From: Michael R. Crusoe <crusoe@debian.org>
Subject: Switch to pytest from unsupported nose
Forwarded: https://github.com/jmschrei/pomegranate/pull/1006
--- python-pomegranate.orig/tests/test_bayes_classifier.py
+++ python-pomegranate/tests/test_bayes_classifier.py
@@ -5,32 +5,27 @@
 from pomegranate.io import DataGenerator
 from pomegranate.io import DataFrameGenerator
 
-from .tools import with_setup
-from .tools import assert_equal
-from .tools import assert_not_equal
-from .tools import assert_true
-
 from numpy.testing import assert_almost_equal
 from numpy.testing import assert_array_almost_equal
 
 import pandas
-import random
 import pickle
 import numpy as np
 
-nan = numpy.nan
+import pytest
+
+nan = np.nan
 
-def setup_multivariate_gaussian():
+@pytest.fixture
+def multivariate_gaussian():
 	mu, cov = [0, 0, 0], numpy.eye(3)
 	d1 = MultivariateGaussianDistribution(mu, cov)
 
 	mu, cov = [2, 2, 2], numpy.eye(3)
 	d2 = MultivariateGaussianDistribution(mu, cov)
 
-	global model
 	model = BayesClassifier([d1, d2])
 
-	global X
 	X = numpy.array([[ 0.3,  0.5,  0.1],
 					 [ 0.8,  1.4,  0.5],
 					 [ 1.4,  2.6,  1.8],
@@ -42,10 +37,8 @@
 					 [-1.8,  0.3,  0.5],
 					 [ 0.7, -1.3, -0.1]])
 
-	global y
 	y = [0, 0, 0, 1, 1, 1, 1, 0, 0, 0]
 
-	global X_nan
 	X_nan = numpy.array([[ 0.3,  nan,  0.1],
 		     			 [ nan,  1.4,  nan],
 			     		 [ 1.4,  2.6,  nan],
@@ -56,9 +49,11 @@
 						 [-1.2, -1.8, -1.5],
 						 [ nan,  0.3,  0.5],
 						 [ nan, -1.3,  nan]])
+	return model, X, y, X_nan
 
 
-def setup_multivariate_mixed():
+@pytest.fixture
+def multivariate_mixed():
 	mu, cov = [0, 0, 0], numpy.eye(3)
 	d1 = MultivariateGaussianDistribution(mu, cov)
 
@@ -67,10 +62,8 @@
 	d23 = PoissonDistribution(3)
 	d2 = IndependentComponentsDistribution([d21, d22, d23])
 
-	global model
 	model = BayesClassifier([d1, d2])
 
-	global X
 	X = numpy.array([[ 0.3,  0.5,  0.1],
 					 [ 0.8,  1.4,  0.5],
 					 [ 1.4,  2.6,  1.8],
@@ -82,10 +75,8 @@
 					 [ 1.8,  0.3,  0.5],
 					 [ 0.7,  1.3,  0.1]])
 
-	global y
 	y = [0, 0, 0, 1, 1, 1, 1, 0, 0, 0]
 
-	global X_nan
 	X_nan = numpy.array([[ 0.3,  nan,  0.1],
 		     			 [ nan,  1.4,  nan],
 			     		 [ 1.4,  2.6,  nan],
@@ -96,14 +87,11 @@
 						 [ 1.2,  1.8,  1.5],
 						 [ nan,  0.3,  0.5],
 						 [ nan,  1.3,  nan]])
+	return model, X, y, X_nan
 
 
-def setup_hmm():
-	global model
-	global hmm1
-	global hmm2
-	global hmm3
-
+@pytest.fixture
+def hmm():
 	rigged = State( DiscreteDistribution({ 'H': 0.8, 'T': 0.2 }) )
 	unrigged = State( DiscreteDistribution({ 'H': 0.5, 'T':0.5 }) )
 
@@ -127,15 +115,9 @@
 	hmm3.bake()
 
 	model = BayesClassifier([hmm1, hmm2, hmm3])
+	return model, hmm1, hmm2, hmm3
 
 
-def setup_multivariate():
-	pass
-
-
-def teardown():
-	pass
-
 def test_unpickle_bayes_model():
 	"""Test that `BayesModel` can be pickled and unpickled."""
 	dists = [BernoulliDistribution(0.2), BernoulliDistribution(0.3)]
@@ -152,21 +134,21 @@
 		unpickled_model.distributions[1].parameters,
 	)
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_bc_multivariate_gaussian_initialization():
-	assert_equal(model.d, 3)
-	assert_equal(model.n, 2)
-	assert_equal(model.is_vl_, False)
-
-@with_setup(setup_multivariate_mixed, teardown)
-def test_bc_multivariate_mixed_initialization():
-	assert_equal(model.d, 3)
-	assert_equal(model.n, 2)
-	assert_equal(model.is_vl_, False)
+def test_bc_multivariate_gaussian_initialization(multivariate_gaussian):
+	model, X, y, X_nan = multivariate_gaussian
+	assert model.d == 3
+	assert model.n == 2
+	assert model.is_vl_ == False
+
+def test_bc_multivariate_mixed_initialization(multivariate_mixed):
+	model, X, y, X_nan = multivariate_mixed
+	assert model.d == 3
+	assert model.n == 2
+	assert model.is_vl_ == False
 
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_bc_multivariate_gaussian_predict_log_proba():
+def test_bc_multivariate_gaussian_predict_log_proba(multivariate_gaussian):
+	model, X, y, X_nan = multivariate_gaussian
 	y_hat = model.predict_log_proba(X)
 	y = [[ -1.48842547e-02,  -4.21488425e+00],
 		 [ -4.37487950e-01,  -1.03748795e+00],
@@ -182,8 +164,8 @@
 	assert_array_almost_equal(y, y_hat)
 
 
-@with_setup(setup_multivariate_mixed, teardown)
-def test_bc_multivariate_mixed_predict_log_proba():
+def test_bc_multivariate_mixed_predict_log_proba(multivariate_mixed):
+	model, X, y, X_nan = multivariate_mixed
 	y_hat = model.predict_log_proba(X)
 	y = [[ -5.03107596e-01,  -9.27980626e-01],
 		 [ -1.86355320e-01,  -1.77183117e+00],
@@ -199,8 +181,8 @@
 	assert_array_almost_equal(y, y_hat)
 
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_bc_multivariate_gaussian_nan_predict_log_proba():
+def test_bc_multivariate_gaussian_nan_predict_log_proba(multivariate_gaussian):
+	model, X, y, X_nan = multivariate_gaussian
 	y_hat = model.predict_log_proba(X_nan)
 	y = [[ -3.99533332e-02,  -3.23995333e+00],
 		 [ -1.17110067e+00,  -3.71100666e-01],
@@ -216,8 +198,8 @@
 	assert_array_almost_equal(y, y_hat)
 
 
-@with_setup(setup_multivariate_mixed, teardown)
-def test_bc_multivariate_mixed_nan_predict_log_proba():
+def test_bc_multivariate_mixed_nan_predict_log_proba(multivariate_mixed):
+	model, X, y, X_nan = multivariate_mixed
 	y_hat = model.predict_log_proba(X_nan)
 	y = [[ -3.57980882e-01,  -1.20093223e+00],
 		 [ -1.20735130e+00,  -3.55230506e-01],
@@ -233,8 +215,8 @@
 	assert_array_almost_equal(y, y_hat)
 
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_bc_multivariate_gaussian_predict_log_proba_parallel():
+def test_bc_multivariate_gaussian_predict_log_proba_parallel(multivariate_gaussian):
+	model, X, y, X_nan = multivariate_gaussian
 	y_hat = model.predict_log_proba(X, n_jobs=2)
 	y = [[ -1.48842547e-02,  -4.21488425e+00],
 		 [ -4.37487950e-01,  -1.03748795e+00],
@@ -250,8 +232,8 @@
 	assert_array_almost_equal(y, y_hat)
 
 
-@with_setup(setup_multivariate_mixed, teardown)
-def test_bc_multivariate_mixed_predict_log_proba_parallel():
+def test_bc_multivariate_mixed_predict_log_proba_parallel(multivariate_mixed):
+	model, X, y, X_nan = multivariate_mixed
 	y_hat = model.predict_log_proba(X, n_jobs=2)
 	y = [[ -5.03107596e-01,  -9.27980626e-01],
 		 [ -1.86355320e-01,  -1.77183117e+00],
@@ -267,8 +249,8 @@
 	assert_array_almost_equal(y, y_hat)
 
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_bc_multivariate_gaussian_predict_proba():
+def test_bc_multivariate_gaussian_predict_proba(multivariate_gaussian):
+	model, X, y, X_nan = multivariate_gaussian
 	y_hat = model.predict_proba(X)
 	y =	[[  9.85225968e-01,   1.47740317e-02],
 		 [  6.45656306e-01,   3.54343694e-01],
@@ -284,8 +266,8 @@
 	assert_array_almost_equal(y, y_hat)
 
 
-@with_setup(setup_multivariate_mixed, teardown)
-def test_bc_multivariate_mixed_predict_proba():
+def test_bc_multivariate_mixed_predict_proba(multivariate_mixed):
+	model, X, y, X_nan = multivariate_mixed
 	y_hat = model.predict_proba(X)
 	y = [[ 0.60464873,  0.39535127],
 		 [ 0.82997863,  0.17002137],
@@ -301,8 +283,8 @@
 	assert_array_almost_equal(y, y_hat)
 
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_bc_multivariate_gaussian_nan_predict_proba():
+def test_bc_multivariate_gaussian_nan_predict_proba(multivariate_gaussian):
+	model, X, y, X_nan = multivariate_gaussian
 	y_hat = model.predict_proba(X_nan)
 	y = [[  9.60834277e-01,   3.91657228e-02],
 		 [  3.10025519e-01,   6.89974481e-01],
@@ -318,8 +300,8 @@
 	assert_array_almost_equal(y, y_hat)
 
 
-@with_setup(setup_multivariate_mixed, teardown)
-def test_bc_multivariate_mixed_nan_predict_proba():
+def test_bc_multivariate_mixed_nan_predict_proba(multivariate_mixed):
+	model, X, y, X_nan = multivariate_mixed
 	y_hat = model.predict_proba(X_nan)
 	y = [[  6.99086440e-01,   3.00913560e-01],
 		 [  2.98988163e-01,   7.01011837e-01],
@@ -335,8 +317,8 @@
 	assert_array_almost_equal(y, y_hat)
 
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_bc_multivariate_gaussian_predict_proba_parallel():
+def test_bc_multivariate_gaussian_predict_proba_parallel(multivariate_gaussian):
+	model, X, y, X_nan = multivariate_gaussian
 	y_hat = model.predict_proba(X, n_jobs=2)
 	y = [[  9.85225968e-01,   1.47740317e-02],
 		 [  6.45656306e-01,   3.54343694e-01],
@@ -352,8 +334,8 @@
 	assert_array_almost_equal(y, y_hat)
 
 
-@with_setup(setup_multivariate_mixed, teardown)
-def test_bc_multivariate_mixed_predict_proba_parallel():
+def test_bc_multivariate_mixed_predict_proba_parallel(multivariate_mixed):
+	model, X, y, X_nan = multivariate_mixed
 	y_hat = model.predict_proba(X, n_jobs=2)
 	y = [[ 0.60464873,  0.39535127],
 		 [ 0.82997863,  0.17002137],
@@ -369,56 +351,56 @@
 	assert_array_almost_equal(y, y_hat)
 
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_bc_multivariate_gaussian_predict():
+def test_bc_multivariate_gaussian_predict(multivariate_gaussian):
+	model, X, y, X_nan = multivariate_gaussian
 	y_hat = model.predict(X)
 	y = [0, 0, 1, 1, 1, 1, 1, 0, 0, 0]
 
 	assert_array_almost_equal(y, y_hat)
 
 
-@with_setup(setup_multivariate_mixed, teardown)
-def test_bc_multivariate_mixed_predict():
+def test_bc_multivariate_mixed_predict(multivariate_mixed):
+	model, X, y, X_nan = multivariate_mixed
 	y_hat = model.predict(X)
 	y = [0, 0, 0, 1, 1, 0, 0, 0, 0, 0]
 
 	assert_array_almost_equal(y, y_hat)
 
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_bc_multivariate_gaussian_nan_predict():
+def test_bc_multivariate_gaussian_nan_predict(multivariate_gaussian):
+	model, X, y, X_nan = multivariate_gaussian
 	y_hat = model.predict(X_nan)
 	y = [0, 1, 1, 0, 1, 1, 1, 0, 0, 0]
 
 	assert_array_almost_equal(y, y_hat)
 
 
-@with_setup(setup_multivariate_mixed, teardown)
-def test_bc_multivariate_mixed_nan_predict():
+def test_bc_multivariate_mixed_nan_predict(multivariate_mixed):
+	model, X, y, X_nan = multivariate_mixed
 	y_hat = model.predict(X_nan)
 	y = [0, 1, 0, 0, 1, 0, 1, 0, 0, 1]
 
 	assert_array_almost_equal(y, y_hat)
 
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_bc_multivariate_gaussian_predict_parallel():
+def test_bc_multivariate_gaussian_predict_parallel(multivariate_gaussian):
+	model, X, y, X_nan = multivariate_gaussian
 	y_hat = model.predict(X, n_jobs=2)
 	y = [0, 0, 1, 1, 1, 1, 1, 0, 0, 0]
 
 	assert_array_almost_equal(y, y_hat)
 
 
-@with_setup(setup_multivariate_mixed, teardown)
-def test_bc_multivariate_mixed_predict_parallel():
+def test_bc_multivariate_mixed_predict_parallel(multivariate_mixed):
+	model, X, y, X_nan = multivariate_mixed
 	y_hat = model.predict(X, n_jobs=2)
 	y = [0, 0, 0, 1, 1, 0, 0, 0, 0, 0]
 
 	assert_array_almost_equal(y, y_hat)
 
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_bc_multivariate_gaussian_fit_parallel():
+def test_bc_multivariate_gaussian_fit_parallel(multivariate_gaussian):
+	model, X, y, X_nan = multivariate_gaussian
 	model.fit(X, y, n_jobs=2)
 
 	mu1 = model.distributions[0].parameters[0]
@@ -442,8 +424,8 @@
 	assert_array_almost_equal(cov2, cov2_t)
 
 
-@with_setup(setup_multivariate_mixed, teardown)
-def test_bc_multivariate_mixed_fit_parallel():
+def test_bc_multivariate_mixed_fit_parallel(multivariate_mixed):
+	model, X, y, X_nan = multivariate_mixed
 	model.fit(X, y, n_jobs=2)
 
 	mu1 = model.distributions[0].parameters[0]
@@ -464,8 +446,8 @@
 	assert_array_almost_equal(d23.parameters, [2.625])
 
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_bc_multivariate_gaussian_from_samples():
+def test_bc_multivariate_gaussian_from_samples(multivariate_gaussian):
+	model, X, y, X_nan = multivariate_gaussian
 	model = BayesClassifier.from_samples(MultivariateGaussianDistribution, X, y)
 
 	mu1 = model.distributions[0].parameters[0]
@@ -488,68 +470,68 @@
 	assert_array_almost_equal(cov2, cov2_t)
 
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_bc_multivariate_gaussian_pickle():
+def test_bc_multivariate_gaussian_pickle(multivariate_gaussian):
+	model, X, y, X_nan = multivariate_gaussian
 	model2 = pickle.loads(pickle.dumps(model))
 
-	assert_true(isinstance(model2, BayesClassifier))
-	assert_true(isinstance(model2.distributions[0], MultivariateGaussianDistribution))
-	assert_true(isinstance(model2.distributions[1], MultivariateGaussianDistribution))
+	assert isinstance(model2, BayesClassifier)
+	assert isinstance(model2.distributions[0], MultivariateGaussianDistribution)
+	assert isinstance(model2.distributions[1], MultivariateGaussianDistribution)
 	assert_array_almost_equal(model.weights, model2.weights)
 
 
-@with_setup(setup_multivariate_mixed, teardown)
-def test_bc_multivariate_mixed_pickle():
+def test_bc_multivariate_mixed_pickle(multivariate_mixed):
+	model, X, y, X_nan = multivariate_mixed
 	model2 = pickle.loads(pickle.dumps(model))
 
-	assert_true(isinstance(model2, BayesClassifier))
-	assert_true(isinstance(model2.distributions[0], MultivariateGaussianDistribution))
-	assert_true(isinstance(model2.distributions[1], IndependentComponentsDistribution))
+	assert isinstance(model2, BayesClassifier)
+	assert isinstance(model2.distributions[0], MultivariateGaussianDistribution)
+	assert isinstance(model2.distributions[1], IndependentComponentsDistribution)
 	assert_array_almost_equal(model.weights, model2.weights)
 
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_bc_multivariate_gaussian_to_json():
+def test_bc_multivariate_gaussian_to_json(multivariate_gaussian):
+	model, X, y, X_nan = multivariate_gaussian
 	model2 = BayesClassifier.from_json(model.to_json())
 
-	assert_true(isinstance(model2, BayesClassifier))
-	assert_true(isinstance(model2.distributions[0], MultivariateGaussianDistribution))
-	assert_true(isinstance(model2.distributions[1], MultivariateGaussianDistribution))
+	assert isinstance(model2, BayesClassifier)
+	assert isinstance(model2.distributions[0], MultivariateGaussianDistribution)
+	assert isinstance(model2.distributions[1], MultivariateGaussianDistribution)
 	assert_array_almost_equal(model.weights, model2.weights)
 
 
-@with_setup(setup_multivariate_mixed, teardown)
-def test_bc_multivariate_mixed_to_json():
+def test_bc_multivariate_mixed_to_json(multivariate_mixed):
+	model, X, y, X_nan = multivariate_mixed
 	model2 = BayesClassifier.from_json(model.to_json())
 
-	assert_true(isinstance(model2, BayesClassifier))
-	assert_true(isinstance(model2.distributions[0], MultivariateGaussianDistribution))
-	assert_true(isinstance(model2.distributions[1], IndependentComponentsDistribution))
+	assert isinstance(model2, BayesClassifier)
+	assert isinstance(model2.distributions[0], MultivariateGaussianDistribution)
+	assert isinstance(model2.distributions[1], IndependentComponentsDistribution)
 	assert_array_almost_equal(model.weights, model2.weights)
 
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_bc_multivariate_gaussian_robust_from_json():
+def test_bc_multivariate_gaussian_robust_from_json(multivariate_gaussian):
+	model, X, y, X_nan = multivariate_gaussian
 	model2 = from_json(model.to_json())
 
-	assert_true(isinstance(model2, BayesClassifier))
-	assert_true(isinstance(model2.distributions[0], MultivariateGaussianDistribution))
-	assert_true(isinstance(model2.distributions[1], MultivariateGaussianDistribution))
+	assert isinstance(model2, BayesClassifier)
+	assert isinstance(model2.distributions[0], MultivariateGaussianDistribution)
+	assert isinstance(model2.distributions[1], MultivariateGaussianDistribution)
 	assert_array_almost_equal(model.weights, model2.weights)
 
 
-@with_setup(setup_multivariate_mixed, teardown)
-def test_bc_multivariate_mixed_robust_from_json():
+def test_bc_multivariate_mixed_robust_from_json(multivariate_mixed):
+	model, X, y, X_nan = multivariate_mixed
 	model2 = from_json(model.to_json())
 
-	assert_true(isinstance(model2, BayesClassifier))
-	assert_true(isinstance(model2.distributions[0], MultivariateGaussianDistribution))
-	assert_true(isinstance(model2.distributions[1], IndependentComponentsDistribution))
+	assert isinstance(model2, BayesClassifier)
+	assert isinstance(model2.distributions[0], MultivariateGaussianDistribution)
+	assert isinstance(model2.distributions[1], IndependentComponentsDistribution)
 	assert_array_almost_equal(model.weights, model2.weights)
 
 
-@with_setup(setup_hmm, teardown)
-def test_model():
+def test_model(hmm):
+	model, hmm1, hmm2, hmm3 = hmm
 	assert_almost_equal(hmm1.log_probability(list('H')), -0.2231435513142097 )
 	assert_almost_equal(hmm1.log_probability(list('T')), -1.6094379124341003 )
 	assert_almost_equal(hmm1.log_probability(list('HHHH')), -0.8925742052568388 )
@@ -570,169 +552,174 @@
 	assert_almost_equal(hmm3.log_probability(list('THTHTHTHTHTH')), -8.883630243546788)
 	assert_almost_equal(hmm3.log_probability(list('THTHHHHHTHTH')), -7.645551826734343)
 
-	assert_equal(model.d, 1)
+	assert model.d == 1
+
+
+def test_hmm_log_proba(hmm):
+    model, hmm1, hmm2, hmm3 = hmm
+    logs = []
+    seqs = [list('H'), list('THHH'), list('TTTT'), list('THTHTHTHTHTH'),
+            list('THTHHHHHTHTH')]
+ 
+    for seq in seqs:
+        logs.append(model.predict_log_proba(numpy.array([seq]))[0])
 
+    assert_almost_equal(logs[0][0], -0.89097292388986515)
+    assert_almost_equal(logs[0][1], -1.3609765531356006)
+    assert_almost_equal(logs[0][2], -1.0986122886681096)
 
-@with_setup(setup_hmm, teardown)
-def test_hmm_log_proba():
-	logs = []
-	seqs = [list('H'), list('THHH'), list('TTTT'), list('THTHTHTHTHTH'), 
-		list('THTHHHHHTHTH')]
+    assert_almost_equal(logs[1][0], -0.93570553121744293)
+    assert_almost_equal(logs[1][1], -1.429425687080494)
+    assert_almost_equal(logs[1][2], -0.9990078376167526)
 
-	for seq in seqs:
-		logs.append(model.predict_log_proba(numpy.array([seq]))[0])
+    assert_almost_equal(logs[2][0], -3.9007882563128864)
+    assert_almost_equal(logs[2][1], -0.23562532881626597)
+    assert_almost_equal(logs[2][2], -1.6623251045711958)
 
-	assert_almost_equal(logs[0][0], -0.89097292388986515)
-	assert_almost_equal(logs[0][1], -1.3609765531356006)
-	assert_almost_equal(logs[0][2], -1.0986122886681096)
+    assert_almost_equal(logs[3][0], -3.1703366478831185)
+    assert_almost_equal(logs[3][1], -0.49261403211260379)
+    assert_almost_equal(logs[3][2], -1.058478108940049)
 
-	assert_almost_equal(logs[1][0], -0.93570553121744293)
-	assert_almost_equal(logs[1][1], -1.429425687080494)
-	assert_almost_equal(logs[1][2], -0.9990078376167526)
+    assert_almost_equal(logs[4][0], -1.3058441172130273)
+    assert_almost_equal(logs[4][1], -1.4007102236822906)
+    assert_almost_equal(logs[4][2], -0.7284958836972919)
 
-	assert_almost_equal(logs[2][0], -3.9007882563128864)
-	assert_almost_equal(logs[2][1], -0.23562532881626597)
-	assert_almost_equal(logs[2][2], -1.6623251045711958)
 
-	assert_almost_equal(logs[3][0], -3.1703366478831185)
-	assert_almost_equal(logs[3][1], -0.49261403211260379)
-	assert_almost_equal(logs[3][2], -1.058478108940049)
+def test_hmm_proba(hmm):
+    model, hmm1, hmm2, hmm3 = hmm
+    probs = []
+    seqs = [list('H'), list('THHH'), list('TTTT'), list('THTHTHTHTHTH'),
+            list('THTHHHHHTHTH')]
 
-	assert_almost_equal(logs[4][0], -1.3058441172130273)
-	assert_almost_equal(logs[4][1], -1.4007102236822906)
-	assert_almost_equal(logs[4][2], -0.7284958836972919)
+    for seq in seqs:
+        probs.append(model.predict_proba(numpy.array([seq]))[0])
 
+    assert_almost_equal(probs[0][0], 0.41025641025641024)
+    assert_almost_equal(probs[0][1], 0.25641025641025639)
+    assert_almost_equal(probs[0][2], 0.33333333333333331)
 
-@with_setup(setup_hmm, teardown)
-def test_hmm_proba():
-	probs = []
-	seqs = [list('H'), list('THHH'), list('TTTT'), list('THTHTHTHTHTH'), 
-		list('THTHHHHHTHTH')]
+    assert_almost_equal(probs[1][0], 0.39230898163446098)
+    assert_almost_equal(probs[1][1], 0.23944639992337707)
+    assert_almost_equal(probs[1][2], 0.36824461844216183)
 
-	for seq in seqs:
-		probs.append(model.predict_proba(numpy.array([seq]))[0])
+    assert_almost_equal(probs[2][0], 0.020225961918306088)
+    assert_almost_equal(probs[2][1], 0.79007663743383105)
+    assert_almost_equal(probs[2][2], 0.18969740064786292)
 
-	assert_almost_equal(probs[0][0], 0.41025641025641024)
-	assert_almost_equal(probs[0][1], 0.25641025641025639)
-	assert_almost_equal(probs[0][2], 0.33333333333333331)
+    assert_almost_equal(probs[3][0], 0.041989459861032523)
+    assert_almost_equal(probs[3][1], 0.61102706038265642)
+    assert_almost_equal(probs[3][2], 0.346983479756311)
 
-	assert_almost_equal(probs[1][0], 0.39230898163446098)
-	assert_almost_equal(probs[1][1], 0.23944639992337707)
-	assert_almost_equal(probs[1][2], 0.36824461844216183)
+    assert_almost_equal(probs[4][0], 0.27094373022369794)
+    assert_almost_equal(probs[4][1], 0.24642188711704707)
+    assert_almost_equal(probs[4][2], 0.48263438265925512)
 
-	assert_almost_equal(probs[2][0], 0.020225961918306088)
-	assert_almost_equal(probs[2][1], 0.79007663743383105)
-	assert_almost_equal(probs[2][2], 0.18969740064786292)
 
-	assert_almost_equal(probs[3][0], 0.041989459861032523)
-	assert_almost_equal(probs[3][1], 0.61102706038265642)
-	assert_almost_equal(probs[3][2], 0.346983479756311)
+def test_hmm_prediction(hmm):
+    model, hmm1, hmm2, hmm3 = hmm
+    predicts = []
+    seqs = [list('H'), list('THHH'), list('TTTT'), list('THTHTHTHTHTH'),
+            list('THTHHHHHTHTH')]
 
-	assert_almost_equal(probs[4][0], 0.27094373022369794)
-	assert_almost_equal(probs[4][1], 0.24642188711704707)
-	assert_almost_equal(probs[4][2], 0.48263438265925512)
+    for seq in seqs:
+        predicts.append(model.predict(numpy.array([seq]))[0])
 
+    assert predicts[0] == 0
+    assert predicts[1] == 0
+    assert predicts[2] == 1
+    assert predicts[3] == 1
+    assert predicts[4] == 2
 
-@with_setup(setup_hmm, teardown)
-def test_hmm_prediction():
-	predicts = []
-	seqs = [list('H'), list('THHH'), list('TTTT'), list('THTHTHTHTHTH'), 
-		list('THTHHHHHTHTH')]
 
-	for seq in seqs:
-		predicts.append(model.predict(numpy.array([seq]))[0])
+def test_io_log_probability(multivariate_gaussian):
+    model, X, y, X_nan = multivariate_gaussian
+    X2 = DataGenerator(X)
+    X3 = DataFrameGenerator(pandas.DataFrame(X))
 
-	assert_equal(predicts[0], 0)
-	assert_equal(predicts[1], 0)
-	assert_equal(predicts[2], 1)
-	assert_equal(predicts[3], 1)
-	assert_equal(predicts[4], 2)
+    logp1 = model.log_probability(X)
+    logp2 = model.log_probability(X2)
+    logp3 = model.log_probability(X3)
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_io_log_probability():
-	X2 = DataGenerator(X)
-	X3 = DataFrameGenerator(pandas.DataFrame(X))
+    assert_array_almost_equal(logp1, logp2)
+    assert_array_almost_equal(logp1, logp3)
 
-	logp1 = model.log_probability(X)
-	logp2 = model.log_probability(X2)
-	logp3 = model.log_probability(X3)
 
-	assert_array_almost_equal(logp1, logp2)
-	assert_array_almost_equal(logp1, logp3)
+def test_io_predict(multivariate_gaussian):
+    model, X, y, X_nan = multivariate_gaussian
+    X2 = DataGenerator(X)
+    X3 = DataFrameGenerator(pandas.DataFrame(X))
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_io_predict():
-	X2 = DataGenerator(X)
-	X3 = DataFrameGenerator(pandas.DataFrame(X))
+    y_hat1 = model.predict(X)
+    y_hat2 = model.predict(X2)
+    y_hat3 = model.predict(X3)
 
-	y_hat1 = model.predict(X)
-	y_hat2 = model.predict(X2)
-	y_hat3 = model.predict(X3)
+    assert_array_almost_equal(y_hat1, y_hat2)
+    assert_array_almost_equal(y_hat1, y_hat3)
 
-	assert_array_almost_equal(y_hat1, y_hat2)
-	assert_array_almost_equal(y_hat1, y_hat3)
+def test_io_predict_proba(multivariate_gaussian):
+    model, X, y, X_nan = multivariate_gaussian
+    X2 = DataGenerator(X)
+    X3 = DataFrameGenerator(pandas.DataFrame(X))
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_io_predict_proba():
-	X2 = DataGenerator(X)
-	X3 = DataFrameGenerator(pandas.DataFrame(X))
+    y_hat1 = model.predict_proba(X)
+    y_hat2 = model.predict_proba(X2)
+    y_hat3 = model.predict_proba(X3)
 
-	y_hat1 = model.predict_proba(X)
-	y_hat2 = model.predict_proba(X2)
-	y_hat3 = model.predict_proba(X3)
+    assert_array_almost_equal(y_hat1, y_hat2)
+    assert_array_almost_equal(y_hat1, y_hat3)
 
-	assert_array_almost_equal(y_hat1, y_hat2)
-	assert_array_almost_equal(y_hat1, y_hat3)
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_io_predict_log_proba():
-	X2 = DataGenerator(X)
-	X3 = DataFrameGenerator(pandas.DataFrame(X))
+def test_io_predict_log_proba(multivariate_gaussian):
+    model, X, y, X_nan = multivariate_gaussian
+    X2 = DataGenerator(X)
+    X3 = DataFrameGenerator(pandas.DataFrame(X))
 
-	y_hat1 = model.predict_log_proba(X)
-	y_hat2 = model.predict_log_proba(X2)
-	y_hat3 = model.predict_log_proba(X3)
+    y_hat1 = model.predict_log_proba(X)
+    y_hat2 = model.predict_log_proba(X2)
+    y_hat3 = model.predict_log_proba(X3)
+
+    assert_array_almost_equal(y_hat1, y_hat2)
+    assert_array_almost_equal(y_hat1, y_hat3)
 
-	assert_array_almost_equal(y_hat1, y_hat2)
-	assert_array_almost_equal(y_hat1, y_hat3)
 
 def test_io_fit():
-	X = numpy.random.randn(100, 5) + 0.5
-	weights = numpy.abs(numpy.random.randn(100))
-	y = numpy.random.randint(2, size=100)
-	data_generator = DataGenerator(X, weights, y)
-
-	mu1 = numpy.array([0, 0, 0, 0, 0])
-	mu2 = numpy.array([1, 1, 1, 1, 1])
-	cov = numpy.eye(5)
-
-	d1 = MultivariateGaussianDistribution(mu1, cov)
-	d2 = MultivariateGaussianDistribution(mu2, cov)
-	bc1 = BayesClassifier([d1, d2])
-	bc1.fit(X, y, weights)
-
-	d1 = MultivariateGaussianDistribution(mu1, cov)
-	d2 = MultivariateGaussianDistribution(mu2, cov)
-	bc2 = BayesClassifier([d1, d2])
-	bc2.fit(data_generator)
+    X = numpy.random.randn(100, 5) + 0.5
+    weights = numpy.abs(numpy.random.randn(100))
+    y = numpy.random.randint(2, size=100)
+    data_generator = DataGenerator(X, weights, y)
+
+    mu1 = numpy.array([0, 0, 0, 0, 0])
+    mu2 = numpy.array([1, 1, 1, 1, 1])
+    cov = numpy.eye(5)
+
+    d1 = MultivariateGaussianDistribution(mu1, cov)
+    d2 = MultivariateGaussianDistribution(mu2, cov)
+    bc1 = BayesClassifier([d1, d2])
+    bc1.fit(X, y, weights)
+
+    d1 = MultivariateGaussianDistribution(mu1, cov)
+    d2 = MultivariateGaussianDistribution(mu2, cov)
+    bc2 = BayesClassifier([d1, d2])
+    bc2.fit(data_generator)
+
+    logp1 = bc1.log_probability(X)
+    logp2 = bc2.log_probability(X)
 
-	logp1 = bc1.log_probability(X)
-	logp2 = bc2.log_probability(X)
+    assert_array_almost_equal(logp1, logp2)
 
-	assert_array_almost_equal(logp1, logp2)
 
 def test_io_from_samples():
-	X = numpy.random.randn(100, 5) + 0.5
-	weights = numpy.abs(numpy.random.randn(100))
-	y = numpy.random.randint(2, size=100)
-	data_generator = DataGenerator(X, weights, y)
+    X = numpy.random.randn(100, 5) + 0.5
+    weights = numpy.abs(numpy.random.randn(100))
+    y = numpy.random.randint(2, size=100)
+    data_generator = DataGenerator(X, weights, y)
 
-	d = MultivariateGaussianDistribution
+    d = MultivariateGaussianDistribution
 
-	bc1 = BayesClassifier.from_samples(d, X=X, y=y, weights=weights)
-	bc2 = BayesClassifier.from_samples(d, X=data_generator)
+    bc1 = BayesClassifier.from_samples(d, X=X, y=y, weights=weights)
+    bc2 = BayesClassifier.from_samples(d, X=data_generator)
 
-	logp1 = bc1.log_probability(X)
-	logp2 = bc2.log_probability(X)
+    logp1 = bc1.log_probability(X)
+    logp2 = bc2.log_probability(X)
 
-	assert_array_almost_equal(logp1, logp2)
\ No newline at end of file
+    assert_array_almost_equal(logp1, logp2)
--- python-pomegranate.orig/tests/test_bayesian_network.py
+++ python-pomegranate/tests/test_bayesian_network.py
@@ -17,11 +17,7 @@
 from pomegranate.io import DataGenerator
 from pomegranate.io import DataFrameGenerator
 
-from .tools import with_setup
-from .tools import assert_equal
-from .tools import assert_not_equal
-from .tools import assert_raises
-from .tools import assert_almost_equal
+from .assert_tools import assert_almost_equal
 
 from networkx import DiGraph
 
@@ -29,10 +25,10 @@
 from numpy.testing import assert_array_almost_equal
 
 import pandas
-import random, numpy
-import sys
+import random
+import numpy
+import pytest
 
-nan = numpy.nan
 numpy.random.seed(1)
 
 datasets = [numpy.random.randint(2, size=(10, 4)),
@@ -51,9 +47,10 @@
 
     datasets_nan.append(X)
 
-def setup_monty():
+
+@pytest.fixture
+def monty():
     # Build a model of the Monty Hall Problem
-    global monty_network, monty_index, prize_index, guest_index
 
     random.seed(0)
 
@@ -108,10 +105,12 @@
     prize_index = monty_network.states.index(s2)
     guest_index = monty_network.states.index(s1)
 
+    return monty_network, monty_index, prize_index, guest_index
+
 
-def setup_titanic():
+@pytest.fixture
+def titanic():
     # Build a model of the titanic disaster
-    global titanic_network, passenger, gender, tclass
 
     # Passengers on the Titanic either survive or perish
     passenger = DiscreteDistribution({'survive': 0.6, 'perish': 0.4})
@@ -149,14 +148,14 @@
     titanic_network.add_edge(s1, s2)
     titanic_network.add_edge(s1, s3)
     titanic_network.bake()
+    return titanic_network, passenger, gender, tclass
 
 
-def setup_large_monty():
+@pytest.fixture
+def large_monty():
     # Build the huge monty hall large_monty_network. This is an example I made
     # up with which may not exactly flow logically, but tests a varied type of
     # tables ensures heterogeneous types of data work together.
-    global large_monty_network, large_monty_friend, large_monty_guest, large_monty
-    global large_monty_remaining, large_monty_randomize, large_monty_prize
 
     # large_monty_Friend
     large_monty_friend = DiscreteDistribution({True: 0.5, False: 0.5})
@@ -248,44 +247,48 @@
     large_monty_network.add_transition(s0, s2)
     large_monty_network.bake()
 
-def setup_random_mixed():
+    return (large_monty_network, large_monty_friend, large_monty_guest,
+            large_monty, large_monty_remaining, large_monty_randomize,
+            large_monty_prize)
+
+
+@pytest.fixture
+def random_mixed():
     numpy.random.seed(0)
-    global X
     X = numpy.array([
         numpy.random.choice([True, False], size=50),
         numpy.random.choice(['A', 'B'], size=50),
         numpy.random.choice(2, size=50)
     ], dtype=object).T.copy()
 
-    global weights
     weights = numpy.abs(numpy.random.randn(50))
 
-    global data_generator
     data_generator = DataGenerator(X, weights)
 
-    global model
     model = BayesianNetwork.from_samples(X)
+    return X, weights, data_generator, model
 
-def teardown():
-    pass
 
-
-@with_setup(setup_monty, teardown)
-def test_check_input_dict():
+def test_check_input_dict(monty):
+    monty_network, monty_index, prize_index, guest_index = monty
     obs = {'guest' : 'A'}
     _check_input(obs, monty_network)
 
     obs = {'guest' : 'NaN'}
-    assert_raises(ValueError, _check_input, obs, monty_network)
+    with pytest.raises(ValueError):
+        _check_input(obs, monty_network)
 
     obs = {'guest' : None}
-    assert_raises(ValueError, _check_input, obs, monty_network)
+    with pytest.raises(ValueError):
+        _check_input(obs, monty_network)
 
     obs = {'guest' : numpy.nan}
-    assert_raises(ValueError, _check_input, obs, monty_network)
+    with pytest.raises(ValueError):
+        _check_input(obs, monty_network)
 
     obs = {'guest' : 'NaN', 'prize' : 'B'}
-    assert_raises(ValueError, _check_input, obs, monty_network)
+    with pytest.raises(ValueError):
+        _check_input(obs, monty_network)
 
     obs = {'guest' : 'A', 'prize' : 'C'}
     _check_input(obs, monty_network)
@@ -298,25 +301,30 @@
     _check_input(obs, monty_network)
 
     obs = {'hello' : 'A', 'prize' : 'B'}
-    assert_raises(ValueError, _check_input, obs, monty_network)
+    with pytest.raises(ValueError):
+        _check_input(obs, monty_network)
 
 
-@with_setup(setup_monty, teardown)
-def test_check_input_list_of_dicts():
+def test_check_input_list_of_dicts(monty):
+    monty_network, monty_index, prize_index, guest_index = monty
     obs = {'guest' : 'A'}
     _check_input([obs], monty_network)
 
     obs = {'guest' : 'NaN'}
-    assert_raises(ValueError, _check_input, [obs], monty_network)
+    with pytest.raises(ValueError):
+        _check_input([obs], monty_network)
 
     obs = {'guest' : None}
-    assert_raises(ValueError, _check_input, [obs], monty_network)
+    with pytest.raises(ValueError):
+        _check_input([obs], monty_network)
 
     obs = {'guest' : numpy.nan}
-    assert_raises(ValueError, _check_input, [obs], monty_network)
+    with pytest.raises(ValueError):
+        _check_input([obs], monty_network)
 
     obs = {'guest' : 'NaN', 'prize' : 'B'}
-    assert_raises(ValueError, _check_input, [obs], monty_network)
+    with pytest.raises(ValueError):
+        _check_input([obs], monty_network)
 
     obs = {'guest' : 'A', 'prize' : 'C'}
     _check_input([obs], monty_network)
@@ -329,7 +337,8 @@
     _check_input([obs], monty_network)
 
     obs = {'hello' : 'A', 'prize' : 'B'}
-    assert_raises(ValueError, _check_input, [obs], monty_network)
+    with pytest.raises(ValueError):
+        _check_input([obs], monty_network)
 
     obs = [{'guest' : 'A'}, {'guest' : 'A', 'prize' : 'C'},
         {'guest' : 'A', 'prize' : 'C', 'monty' : 'C'},
@@ -338,11 +347,12 @@
     _check_input(obs, monty_network)
 
     obs.append({'guest' : 'NaN', 'prize' : 'B'})
-    assert_raises(ValueError, _check_input, obs, monty_network)
+    with pytest.raises(ValueError):
+        _check_input(obs, monty_network)
 
 
-@with_setup(setup_monty, teardown)
-def test_check_input_list_of_lists():
+def test_check_input_list_of_lists(monty):
+    monty_network, monty_index, prize_index, guest_index = monty
     obs = ['A', None, None]
     _check_input([obs], monty_network)
 
@@ -359,16 +369,20 @@
     _check_input([obs], monty_network)
 
     obs = numpy.array(['NaN', numpy.nan, numpy.nan])
-    assert_raises(ValueError, _check_input, [obs], monty_network)
+    with pytest.raises(ValueError):
+        _check_input([obs], monty_network)
 
     obs = numpy.array(['A', 'B', 'D'])
-    assert_raises(ValueError, _check_input, [obs], monty_network)
+    with pytest.raises(ValueError):
+        _check_input([obs], monty_network)
 
     obs = ['A']
-    assert_raises(ValueError, _check_input, [obs], monty_network)
+    with pytest.raises(ValueError):
+        _check_input([obs], monty_network)
 
     obs = ['A', 'C', 'E', 'F']
-    assert_raises(ValueError, _check_input, [obs], monty_network)
+    with pytest.raises(ValueError):
+        _check_input([obs], monty_network)
 
     d = DiscreteDistribution({'A': 0.25, 'B': 0.25, 'C': 0.25})
     obs = [d, None, None]
@@ -376,64 +390,70 @@
 
     e = DiscreteDistribution({'A': 0.25, 'B': 0.25, 'D': 0.25})
     obs = [e, None, None]
-    assert_raises(ValueError, _check_input, [obs], monty_network)
+    with pytest.raises(ValueError):
+        _check_input([obs], monty_network)
 
     obs = [['A', None, None], ['A', numpy.nan, numpy.nan], ['A', 'B', 'C'],
         ['A', None, 'C'], [None, 'B', 'C'], [d, None, None]]
     _check_input(obs, monty_network)
 
     obs.append([e, None, None])
-    assert_raises(ValueError, _check_input, obs, monty_network)
+    with pytest.raises(ValueError):
+        _check_input(obs, monty_network)
+
 
+def test_titanic_network(titanic):
+    titanic_network, passenger, gender, tclass = titanic
 
-@with_setup(setup_titanic, teardown)
-def test_titanic_network():
     assert_almost_equal(passenger.log_probability('survive'), numpy.log(0.6))
     assert_almost_equal(passenger.log_probability('survive'), numpy.log(0.6))
 
-    assert_almost_equal(gender.log_probability(('survive', 'male')),   float("-inf"))
+    assert gender.log_probability(('survive', 'male')) ==   float("-inf")
     assert_almost_equal(gender.log_probability(('survive', 'female')), 0.0)
     assert_almost_equal(gender.log_probability(('perish', 'male')),    0.0)
-    assert_almost_equal(gender.log_probability(('perish', 'female')),  float("-inf"))
+    assert gender.log_probability(('perish', 'female')) ==  float("-inf")
 
-    assert_almost_equal(tclass.log_probability(('survive', 'first')), float("-inf"))
+    assert tclass.log_probability(('survive', 'first')) == float("-inf")
     assert_almost_equal(tclass.log_probability(('survive', 'second')), 0.0)
-    assert_almost_equal(tclass.log_probability(('survive', 'third')), float("-inf"))
+    assert tclass.log_probability(('survive', 'third')) == float("-inf")
     assert_almost_equal(tclass.log_probability(('perish', 'first')), 0.0)
-    assert_almost_equal(tclass.log_probability(('perish', 'second')), float("-inf"))
-    assert_almost_equal(tclass.log_probability(('perish', 'third')), float("-inf"))
+    assert tclass.log_probability(('perish', 'second')) == float("-inf")
+    assert tclass.log_probability(('perish', 'third')) == float("-inf")
 
 
-@with_setup(setup_titanic, teardown)
-def test_guest_titanic():
+def test_guest_titanic(titanic):
+    titanic_network, passenger, gender, tclass = titanic
     male = titanic_network.predict_proba({'gender': 'male'})
     female = titanic_network.predict_proba({'gender': 'female'})
 
-    assert_equal(female[0].log_probability("survive"), 0.0)
-    assert_equal(female[0].log_probability("perish"), float("-inf"))
+    assert female[0].log_probability("survive") == 0.0
+    assert female[0].log_probability("perish") == float("-inf")
 
-    assert_equal(female[1], 'female')
-    assert_equal(female[2].log_probability("first"), float("-inf"))
-    assert_equal(female[2].log_probability("second"), 0.0)
-    assert_equal(female[2].log_probability("third"), float("-inf"))
-
-    assert_equal(male[0].log_probability("survive"), float("-inf"))
-    assert_equal(male[0].log_probability("perish"), 0.0)
-
-    assert_equal(male[1], 'male')
-
-    assert_equal(male[2].log_probability("first"), 0.0)
-    assert_equal(male[2].log_probability("second"), float("-inf"))
-    assert_equal(male[2].log_probability("third"), float("-inf"))
+    assert female[1] == 'female'
+    assert female[2].log_probability("first") == float("-inf")
+    assert female[2].log_probability("second") == 0.0
+    assert female[2].log_probability("third") == float("-inf")
+
+    assert male[0].log_probability("survive") == float("-inf")
+    assert male[0].log_probability("perish") == 0.0
+
+    assert male[1] == 'male'
+
+    assert male[2].log_probability("first") == 0.0
+    assert male[2].log_probability("second") == float("-inf")
+    assert male[2].log_probability("third") == float("-inf")
 
     titanic_network2 = BayesianNetwork.from_json(titanic_network.to_json())
 
 
-@with_setup(setup_large_monty, teardown)
-def test_large_monty():
+def test_large_monty(large_monty):
+    (large_monty_network, large_monty_friend, large_monty_guest, large_monty,
+     large_monty_remaining, large_monty_randomize, large_monty_prize
+     ) = large_monty
+
     assert_almost_equal(large_monty.log_probability(('A', 'A', 'C')), numpy.log(0.5))
     assert_almost_equal(large_monty.log_probability(('B', 'B', 'C')), numpy.log(0.5))
-    assert_equal(large_monty.log_probability(('C', 'C', 'C')), float("-inf"))
+    assert large_monty.log_probability(('C', 'C', 'C')) == float("-inf")
 
     data = [[True,  'A', 'A', 'C', 1, True],
             [True,  'A', 'A', 'C', 0, True],
@@ -455,8 +475,10 @@
     assert_almost_equal(large_monty.log_probability(('C', 'C', 'C')), numpy.log(0.75))
 
 
-@with_setup(setup_large_monty, teardown)
-def test_large_monty_friend():
+def test_large_monty_friend(large_monty):
+    (large_monty_network, large_monty_friend, large_monty_guest, large_monty,
+     large_monty_remaining, large_monty_randomize, large_monty_prize
+     ) = large_monty
     assert_almost_equal(large_monty_friend.log_probability(True), numpy.log(0.5))
     assert_almost_equal(large_monty_friend.log_probability(False), numpy.log(0.5))
 
@@ -479,8 +501,10 @@
     assert_almost_equal(large_monty_friend.log_probability(False), numpy.log(5. / 12))
 
 
-@with_setup(setup_large_monty, teardown)
-def test_large_monty_remaining():
+def test_large_monty_remaining(large_monty):
+    (large_monty_network, large_monty_friend, large_monty_guest, large_monty,
+     large_monty_remaining, large_monty_randomize, large_monty_prize
+     ) = large_monty
     model = large_monty_remaining
 
     assert_almost_equal(model.log_probability(0), numpy.log(0.1))
@@ -506,8 +530,10 @@
     assert_almost_equal(model.log_probability(1), numpy.log(5. / 12))
     assert_almost_equal(model.log_probability(2), numpy.log(4. / 12))
 
-@with_setup(setup_large_monty, teardown)
-def test_large_monty_network_log_probability():
+def test_large_monty_network_log_probability(large_monty):
+    (large_monty_network, large_monty_friend, large_monty_guest, large_monty,
+     large_monty_remaining, large_monty_randomize, large_monty_prize
+     ) = large_monty
     model = large_monty_network
 
     data = numpy.array([[True,  'A', 'A', 'C', 1, True],
@@ -539,8 +565,10 @@
 
     assert_array_almost_equal(logp2, logp)
 
-@with_setup(setup_large_monty, teardown)
-def test_large_monty_network_log_probability_parallel():
+def test_large_monty_network_log_probability_parallel(large_monty):
+    (large_monty_network, large_monty_friend, large_monty_guest, large_monty,
+     large_monty_remaining, large_monty_randomize, large_monty_prize
+     ) = large_monty
     model = large_monty_network
 
     data = numpy.array([[True,  'A', 'A', 'C', 1, True],
@@ -572,16 +600,18 @@
 
     assert_array_almost_equal(logp2, logp)
 
-@with_setup(setup_large_monty, teardown)
-def test_large_monty_prize():
+def test_large_monty_prize(large_monty):
+    (large_monty_network, large_monty_friend, large_monty_guest, large_monty,
+     large_monty_remaining, large_monty_randomize, large_monty_prize
+     ) = large_monty
     assert_almost_equal(large_monty_prize.log_probability(
         (True,  True,  'A')), numpy.log(0.3))
     assert_almost_equal(large_monty_prize.log_probability(
         (True,  False, 'C')), numpy.log(0.4))
     assert_almost_equal(large_monty_prize.log_probability(
         (False, True,  'B')), numpy.log(0.9))
-    assert_almost_equal(large_monty_prize.log_probability(
-        (False, False, 'A')), float("-inf"))
+    assert large_monty_prize.log_probability(
+        (False, False, 'A')) == float("-inf")
 
     data = [[True,  'A', 'A', 'C', 1, True],
             [True,  'A', 'A', 'C', 0, True],
@@ -600,8 +630,8 @@
 
     assert_almost_equal(large_monty_prize.log_probability(
         (True, True, 'C')), numpy.log(0.5))
-    assert_equal(large_monty_prize.log_probability(
-        (True, True, 'B')), float("-inf"))
+    assert large_monty_prize.log_probability(
+        (True, True, 'B')) == float("-inf")
 
     a = large_monty_prize.log_probability((True, False, 'A'))
     b = large_monty_prize.log_probability((True, False, 'B'))
@@ -610,8 +640,8 @@
     assert_almost_equal(a, b)
     assert_almost_equal(b, c)
 
-    assert_equal(large_monty_prize.log_probability(
-        (False, False, 'C')), float("-inf"))
+    assert large_monty_prize.log_probability(
+        (False, False, 'C')) == float("-inf")
     assert_almost_equal(large_monty_prize.log_probability(
         (False, True, 'C')), numpy.log(2. / 3))
 
@@ -623,8 +653,8 @@
             raise ValueError("{} != {}".format(yd[key], value))
 
 
-@with_setup(setup_monty, teardown)
-def test_guest_monty():
+def test_guest_monty(monty):
+    monty_network, monty_index, prize_index, guest_index = monty
     a = monty_network.predict_proba({'guest': 'A'})
     b = monty_network.predict_proba({'guest': 'B'})
     c = monty_network.predict_proba({'guest': 'C'})
@@ -644,34 +674,34 @@
         {'A': 1. / 2, 'B': 1. / 2, 'C': 0.0}))
 
 
-@with_setup(setup_monty, teardown)
-def test_guest_with_monty():
+def test_guest_with_monty(monty):
+    monty_network, monty_index, prize_index, guest_index = monty
     b = monty_network.predict_proba({'guest': 'A', 'monty': 'B'})
     c = monty_network.predict_proba({'guest': 'A', 'monty': 'C'})
 
-    assert_equal(b[guest_index], 'A')
-    assert_equal(b[monty_index], 'B')
+    assert b[guest_index] == 'A'
+    assert b[monty_index] == 'B'
     assert_discrete_equal(b[prize_index], DiscreteDistribution(
         {'A': 1. / 3, 'B': 0.0, 'C': 2. / 3}))
 
-    assert_equal(c[guest_index], 'A')
-    assert_equal(c[monty_index], 'C')
+    assert c[guest_index] == 'A'
+    assert c[monty_index] == 'C'
     assert_discrete_equal(c[prize_index], DiscreteDistribution(
         {'A': 1. / 3, 'B': 2. / 3, 'C': 0.0}))
 
 
-@with_setup(setup_monty, teardown)
-def test_monty():
+def test_monty(monty):
+    monty_network, monty_index, prize_index, guest_index = monty
     a = monty_network.predict_proba({'monty': 'A'})
 
-    assert_equal(a[monty_index], 'A')
+    assert a[monty_index] == 'A'
     assert_discrete_equal(a[guest_index], a[prize_index])
     assert_discrete_equal(a[guest_index], DiscreteDistribution(
         {'A': 0.0, 'B': 1. / 2, 'C': 1. / 2}))
 
 
-@with_setup(setup_monty, teardown)
-def test_predict():
+def test_predict(monty):
+    monty_network, monty_index, prize_index, guest_index = monty
     obs = [['A', None, 'B'],
            ['A', None, 'C'],
            ['A', 'B', 'C']]
@@ -692,8 +722,8 @@
                          ['A', 'B', 'C']
                        ])
 
-@with_setup(setup_monty, teardown)
-def test_rejection_sampling():
+def test_rejection_sampling(monty):
+    monty_network, monty_index, prize_index, guest_index = monty
     numpy.random.seed(0)
     predictions = monty_network._rejection(n=10,evidences=[{'guest':'A', 'monty':'B'}])
     (unique, counts) = numpy.unique(predictions[:,1], return_counts=True)
@@ -712,8 +742,8 @@
     #                     ['A', 'C', 'B'],
     #                     ['A', 'C', 'B']])
 
-@with_setup(setup_monty, teardown)
-def test_gibbs_sampling():
+def test_gibbs_sampling(monty):
+    monty_network, monty_index, prize_index, guest_index = monty
     # evidences = [['A', None, 'B'],
     #              ['A', None, 'C'],
     #              ['A', 'B', 'C' ]]
@@ -723,8 +753,8 @@
     # need to fix the seed
     assert(abs(counts[0]-340) < 34)
 
-@with_setup(setup_monty, teardown)
-def test_predict_parallel():
+def test_predict_parallel(monty):
+    monty_network, monty_index, prize_index, guest_index = monty
     obs = [['A', None, 'B'],
            ['A', None, 'C'],
            ['A', 'B', 'C']]
@@ -744,8 +774,8 @@
                          ['A', 'B', 'C']
                        ])
 
-@with_setup(setup_monty, teardown)
-def test_predict_datagenerator():
+def test_predict_datagenerator(monty):
+    monty_network, monty_index, prize_index, guest_index = monty
     obs = [['A', None, 'B'],
            ['A', None, 'C'],
            ['A', 'B', 'C']]
@@ -768,8 +798,8 @@
                          ['A', 'B', 'C']
                        ])
 
-@with_setup(setup_monty, teardown)
-def test_numpy_predict():
+def test_numpy_predict(monty):
+    monty_network, monty_index, prize_index, guest_index = monty
     obs = numpy.array([['A', None, 'B'],
                     ['A', None, 'C'],
                     ['A', 'B', 'C']])
@@ -790,8 +820,8 @@
                          ['A', 'B', 'C']
                        ])
 
-@with_setup(setup_monty, teardown)
-def test_numpy_predict_parallel():
+def test_numpy_predict_parallel(monty):
+    monty_network, monty_index, prize_index, guest_index = monty
     obs = numpy.array([['A', None, 'B'],
                     ['A', None, 'C'],
                     ['A', 'B', 'C']])
@@ -812,8 +842,8 @@
                          ['A', 'B', 'C']
                        ])
 
-@with_setup(setup_monty, teardown)
-def test_numpy_predict_datagenerator():
+def test_numpy_predict_datagenerator(monty):
+    monty_network, monty_index, prize_index, guest_index = monty
     obs = numpy.array([['A', None, 'B'],
                     ['A', None, 'C'],
                     ['A', 'B', 'C']])
@@ -837,29 +867,31 @@
                        ])
 
 
-@with_setup(setup_monty, teardown)
-def test_single_dict_predict_proba():
+def test_single_dict_predict_proba(monty):
+    monty_network, monty_index, prize_index, guest_index = monty
     obs = {'guest': 'A',  'monty': 'B'}
     y = DiscreteDistribution({'A': 1./3, 'B': 0., 'C': 2./3})
     y_hat = monty_network.predict_proba(obs)
 
-    assert_equal(y_hat[0], 'A')
-    assert_equal(y_hat[2], 'B')
+    assert y_hat[0] == 'A'
+    assert y_hat[2] == 'B'
     assert_discrete_equal(y_hat[1], y)
 
 
-@with_setup(setup_large_monty, teardown)
-def test_single_dict_large_predict_proba():
+def test_single_dict_large_predict_proba(large_monty):
+    (large_monty_network, large_monty_friend, large_monty_guest, large_monty,
+     large_monty_remaining, large_monty_randomize, large_monty_prize
+     ) = large_monty
     obs = {'large_monty_friend' : True,  'large_monty_guest': 'A',
         'large_monty_prize': 'A', 'large_monty': 'C'}
     y1 = DiscreteDistribution({0: 0.0472, 1: 0.781, 2: 0.17167})
     y2 = DiscreteDistribution({True: 0.8562, False: 0.143776})
     y_hat = large_monty_network.predict_proba(obs)
 
-    assert_equal(y_hat[0], True)
-    assert_equal(y_hat[1], 'A')
-    assert_equal(y_hat[2], 'A')
-    assert_equal(y_hat[3], 'C')
+    assert y_hat[0] == True
+    assert y_hat[1] == 'A'
+    assert y_hat[2] == 'A'
+    assert y_hat[3] == 'C'
     assert_discrete_equal(y_hat[4], y1, 3)
     assert_discrete_equal(y_hat[5], y2, 3)
 
@@ -869,36 +901,38 @@
     y2 = DiscreteDistribution({True: 0.75, False: 0.25})
     y_hat = large_monty_network.predict_proba(obs)
 
-    assert_equal(y_hat[0], True)
-    assert_equal(y_hat[2], 'A')
-    assert_equal(y_hat[3], 'C')
-    assert_equal(y_hat[4], 2)
+    assert y_hat[0] == True
+    assert y_hat[2] == 'A'
+    assert y_hat[3] == 'C'
+    assert y_hat[4] == 2
     assert_discrete_equal(y_hat[1], y1)
     assert_discrete_equal(y_hat[5], y2)
 
 
-@with_setup(setup_monty, teardown)
-def test_list_of_lists_predict_proba():
+def test_list_of_lists_predict_proba(monty):
+    monty_network, monty_index, prize_index, guest_index = monty
     obs = [['A', None, 'B']]
     y = DiscreteDistribution({'A': 1./3, 'B': 0., 'C': 2./3})
     y_hat = monty_network.predict_proba(obs)
 
-    assert_equal(y_hat[0][0], 'A')
-    assert_equal(y_hat[0][2], 'B')
+    assert y_hat[0][0] == 'A'
+    assert y_hat[0][2] == 'B'
     assert_discrete_equal(y_hat[0][1], y)
 
 
-@with_setup(setup_large_monty, teardown)
-def test_list_of_lists_large_predict_proba():
+def test_list_of_lists_large_predict_proba(large_monty):
+    (large_monty_network, large_monty_friend, large_monty_guest, large_monty,
+     large_monty_remaining, large_monty_randomize, large_monty_prize
+     ) = large_monty
     obs = [[True,  'A', 'A', 'C', None, None]]
     y1 = DiscreteDistribution({0: 0.0472, 1: 0.781, 2: 0.17167})
     y2 = DiscreteDistribution({True: 0.8562, False: 0.143776})
     y_hat = large_monty_network.predict_proba(obs)
 
-    assert_equal(y_hat[0][0], True)
-    assert_equal(y_hat[0][1], 'A')
-    assert_equal(y_hat[0][2], 'A')
-    assert_equal(y_hat[0][3], 'C')
+    assert y_hat[0][0] == True
+    assert y_hat[0][1] == 'A'
+    assert y_hat[0][2] == 'A'
+    assert y_hat[0][3] == 'C'
     assert_discrete_equal(y_hat[0][4], y1, 3)
     assert_discrete_equal(y_hat[0][5], y2, 3)
 
@@ -907,37 +941,39 @@
     y2 = DiscreteDistribution({True: 0.75, False: 0.25})
     y_hat = large_monty_network.predict_proba(obs)
 
-    assert_equal(y_hat[0][0], True)
-    assert_equal(y_hat[0][2], 'A')
-    assert_equal(y_hat[0][3], 'C')
-    assert_equal(y_hat[0][4], 2)
+    assert y_hat[0][0] == True
+    assert y_hat[0][2] == 'A'
+    assert y_hat[0][3] == 'C'
+    assert y_hat[0][4] == 2
     assert_discrete_equal(y_hat[0][1], y1)
     assert_discrete_equal(y_hat[0][5], y2)
 
 
-@with_setup(setup_monty, teardown)
-def test_list_of_dicts_predict_proba():
+def test_list_of_dicts_predict_proba(monty):
+    monty_network, monty_index, prize_index, guest_index = monty
     obs = [{'guest': 'A',  'monty': 'B'}]
     y = DiscreteDistribution({'A': 1./3, 'B': 0., 'C': 2./3})
     y_hat = monty_network.predict_proba(obs)
 
-    assert_equal(y_hat[0][0], 'A')
-    assert_equal(y_hat[0][2], 'B')
+    assert y_hat[0][0] == 'A'
+    assert y_hat[0][2] == 'B'
     assert_discrete_equal(y_hat[0][1], y)
 
 
-@with_setup(setup_large_monty, teardown)
-def test_list_of_dicts_large_predict_proba():
+def test_list_of_dicts_large_predict_proba(large_monty):
+    (large_monty_network, large_monty_friend, large_monty_guest, large_monty,
+     large_monty_remaining, large_monty_randomize, large_monty_prize
+     ) = large_monty
     obs = [{'large_monty_friend' : True,  'large_monty_guest': 'A',
         'large_monty_prize': 'A', 'large_monty': 'C'}]
     y1 = DiscreteDistribution({0: 0.0472, 1: 0.781, 2: 0.17167})
     y2 = DiscreteDistribution({True: 0.8562, False: 0.143776})
     y_hat = large_monty_network.predict_proba(obs)
 
-    assert_equal(y_hat[0][0], True)
-    assert_equal(y_hat[0][1], 'A')
-    assert_equal(y_hat[0][2], 'A')
-    assert_equal(y_hat[0][3], 'C')
+    assert y_hat[0][0] == True
+    assert y_hat[0][1] == 'A'
+    assert y_hat[0][2] == 'A'
+    assert y_hat[0][3] == 'C'
     assert_discrete_equal(y_hat[0][4], y1, 3)
     assert_discrete_equal(y_hat[0][5], y2, 3)
 
@@ -947,16 +983,16 @@
     y2 = DiscreteDistribution({True: 0.75, False: 0.25})
     y_hat = large_monty_network.predict_proba(obs)
 
-    assert_equal(y_hat[0][0], True)
-    assert_equal(y_hat[0][2], 'A')
-    assert_equal(y_hat[0][3], 'C')
-    assert_equal(y_hat[0][4], 2)
+    assert y_hat[0][0] == True
+    assert y_hat[0][2] == 'A'
+    assert y_hat[0][3] == 'C'
+    assert y_hat[0][4] == 2
     assert_discrete_equal(y_hat[0][1], y1)
     assert_discrete_equal(y_hat[0][5], y2)
 
 
-@with_setup(setup_monty, teardown)
-def test_list_of_dicts_predict_proba_parallel():
+def test_list_of_dicts_predict_proba_parallel(monty):
+    monty_network, monty_index, prize_index, guest_index = monty
     obs = [{'guest': 'A',  'monty': 'B'},
            {'guest': 'B', 'prize': 'A'},
            {'monty': 'C', 'prize': 'B'},
@@ -964,33 +1000,38 @@
     y = DiscreteDistribution({'A': 1./3, 'B': 0., 'C': 2./3})
     y_hat = monty_network.predict_proba(obs, n_jobs=2)
 
-    assert_equal(y_hat[0][0], 'A')
-    assert_equal(y_hat[0][2], 'B')
+    assert y_hat[0][0] == 'A'
+    assert y_hat[0][2] == 'B'
     assert_discrete_equal(y_hat[0][1], y)
 
-    assert_equal(y_hat[1][0], 'B')
-    assert_equal(y_hat[1][1], 'A')
+    assert y_hat[1][0] == 'B'
+    assert y_hat[1][1] == 'A'
 
-    assert_equal(y_hat[3][2], 'B')
-    assert_equal(y_hat[4][1], 'A')
+    assert y_hat[3][2] == 'B'
+    assert y_hat[4][1] == 'A'
 
 
-@with_setup(setup_monty, teardown)
-def test_raise_error():
+def test_raise_error(monty):
+    monty_network, monty_index, prize_index, guest_index = monty
     obs = [['green', 'cat', None]]
-    assert_raises(ValueError, monty_network.predict, obs)
+    with pytest.raises(ValueError):
+        monty_network.predict(obs)
 
     obs = [['A', 'b', None]]
-    assert_raises(ValueError, monty_network.predict, obs)
+    with pytest.raises(ValueError):
+        monty_network.predict(obs)
 
     obs = [['none', 'B', None]]
-    assert_raises(ValueError, monty_network.predict, obs)
+    with pytest.raises(ValueError):
+        monty_network.predict(obs)
 
     obs = [['NaN', 'B', None]]
-    assert_raises(ValueError, monty_network.predict, obs)
+    with pytest.raises(ValueError):
+        monty_network.predict(obs)
 
     obs = [['A', 'C', 'D']]
-    assert_raises(ValueError, monty_network.predict, obs)
+    with pytest.raises(ValueError):
+        monty_network.predict(obs)
 
 
 def test_exact_structure_learning():
@@ -1014,74 +1055,74 @@
     for X, n_parents in zip(datasets, n_parents):
         model = BayesianNetwork.from_samples(X, algorithm='exact', penalty=0)
         model2 = BayesianNetwork.from_samples(X, algorithm='exact-dp', penalty=0)
-        assert_equal(sum(map(len, model.structure)), n_parents[0])
-        assert_equal(sum(map(len, model2.structure)), n_parents[0])
+        assert sum(map(len, model.structure)) == n_parents[0]
+        assert sum(map(len, model2.structure)) == n_parents[0]
 
         model = BayesianNetwork.from_samples(X, algorithm='exact')
         model2 = BayesianNetwork.from_samples(X, algorithm='exact-dp')
-        assert_equal(sum(map(len, model.structure)), n_parents[1])
-        assert_equal(sum(map(len, model2.structure)), n_parents[1])
+        assert sum(map(len, model.structure)) == n_parents[1]
+        assert sum(map(len, model2.structure)) == n_parents[1]
 
         model = BayesianNetwork.from_samples(X, algorithm='exact', penalty=1)
         model2 = BayesianNetwork.from_samples(X, algorithm='exact-dp', penalty=1)
-        assert_equal(sum(map(len, model.structure)), n_parents[2])
-        assert_equal(sum(map(len, model2.structure)), n_parents[2])
+        assert sum(map(len, model.structure)) == n_parents[2]
+        assert sum(map(len, model2.structure)) == n_parents[2]
 
         model = BayesianNetwork.from_samples(X, algorithm='exact', penalty=100)
         model2 = BayesianNetwork.from_samples(X, algorithm='exact-dp', penalty=100)
-        assert_equal(sum(map(len, model.structure)), 0)
-        assert_equal(sum(map(len, model2.structure)), 0)
+        assert sum(map(len, model.structure)) == 0
+        assert sum(map(len, model2.structure)) == 0
 
 def test_exact_penalized_low_memory_structure_learning():
     n_parents = [(5, 3, 4), (10, 0, 1), (21, 0, 8), (26, 3, 21)]
     for X, n_parents in zip(datasets, n_parents):
         model = BayesianNetwork.from_samples(X, low_memory=True, algorithm='exact', penalty=0)
         model2 = BayesianNetwork.from_samples(X, low_memory=True, algorithm='exact-dp', penalty=0)
-        assert_equal(sum(map(len, model.structure)), n_parents[0])
-        assert_equal(sum(map(len, model2.structure)), n_parents[0])
+        assert sum(map(len, model.structure)) == n_parents[0]
+        assert sum(map(len, model2.structure)) == n_parents[0]
 
         model = BayesianNetwork.from_samples(X, low_memory=True, algorithm='exact')
         model2 = BayesianNetwork.from_samples(X, low_memory=True, algorithm='exact-dp')
-        assert_equal(sum(map(len, model.structure)), n_parents[1])
-        assert_equal(sum(map(len, model2.structure)), n_parents[1])
+        assert sum(map(len, model.structure)) == n_parents[1]
+        assert sum(map(len, model2.structure)) == n_parents[1]
 
         model = BayesianNetwork.from_samples(X, low_memory=True, algorithm='exact', penalty=1)
         model2 = BayesianNetwork.from_samples(X, low_memory=True, algorithm='exact-dp', penalty=1)
-        assert_equal(sum(map(len, model.structure)), n_parents[2])
-        assert_equal(sum(map(len, model2.structure)), n_parents[2])
+        assert sum(map(len, model.structure)) == n_parents[2]
+        assert sum(map(len, model2.structure)) == n_parents[2]
 
         model = BayesianNetwork.from_samples(X, low_memory=True, algorithm='exact', penalty=100)
         model2 = BayesianNetwork.from_samples(X, low_memory=True, algorithm='exact-dp', penalty=100)
-        assert_equal(sum(map(len, model.structure)), 0)
-        assert_equal(sum(map(len, model2.structure)), 0)
+        assert sum(map(len, model.structure)) == 0
+        assert sum(map(len, model2.structure)) == 0
 
 def test_exact_structure_learning_include_edges():
     for X in datasets:
         model = BayesianNetwork.from_samples(X, algorithm='exact', 
             include_edges=[(1, 3)])
-        assert_equal(model.structure[3], (1,))
+        assert model.structure[3] == (1,)
 
         model = BayesianNetwork.from_samples(X, algorithm='exact')
-        assert_not_equal(model.structure[3], (1,))
+        assert model.structure[3] != (1,)
 
 def test_exact_low_memory_structure_learning_include_edges():
     for X in datasets:
         model = BayesianNetwork.from_samples(X, algorithm='exact', 
             low_memory=True, include_edges=[(1, 3)])
-        assert_equal(model.structure[3], (1,))
+        assert model.structure[3] == (1,)
 
         model = BayesianNetwork.from_samples(X, low_memory=True,
             algorithm='exact')
-        assert_not_equal(model.structure[3], (1,))
+        assert model.structure[3] != (1,)
 
 def test_exact_dp_structure_learning_include_edges():
     for X in datasets:
         model = BayesianNetwork.from_samples(X, algorithm='exact-dp', 
             include_edges=[(1, 3)])
-        assert_equal(model.structure[3], (1,))
+        assert model.structure[3] == (1,)
 
         model = BayesianNetwork.from_samples(X, algorithm='exact-dp')
-        assert_not_equal(model.structure[3], (1,))
+        assert model.structure[3] != (1,)
 
 def test_exact_structure_learning_exclude_edges():
     for X in datasets:
@@ -1094,9 +1135,9 @@
         # Learn constrained network
         model = BayesianNetwork.from_samples(X, algorithm='exact', 
             exclude_edges=[(1, d-1), (d-1, d-2)])    
-        assert_not_equal(model.structure[-1], (1,))
-        assert_not_equal(model.structure[-2], (d-1,))
-        assert_equal(model.structure[-2], (1,))
+        assert model.structure[-1] != (1,)
+        assert model.structure[-2] != (d-1,)
+        assert model.structure[-2] == (1,)
 
 def test_exact_low_memory_structure_learning_exclude_edges():
     for X in datasets:
@@ -1109,9 +1150,9 @@
         # Learn constrained network
         model = BayesianNetwork.from_samples(X, algorithm='exact', 
             low_memory=True, exclude_edges=[(1, d-1), (d-1, d-2)])    
-        assert_not_equal(model.structure[-1], (1,))
-        assert_not_equal(model.structure[-2], (d-1,))
-        assert_equal(model.structure[-2], (1,))
+        assert model.structure[-1] != (1,)
+        assert model.structure[-2] != (d-1,)
+        assert model.structure[-2] == (1,)
 
 
 def test_exact_dp_structure_learning_exclude_edges():
@@ -1125,8 +1166,8 @@
         # Learn constrained network
         model = BayesianNetwork.from_samples(X, algorithm='exact-dp', 
             exclude_edges=[(1, d-1), (d-1, d-2)])    
-        assert_not_equal(model.structure[-1], (1,))
-        assert_not_equal(model.structure[-2], (d-1,))
+        assert model.structure[-1] != (1,)
+        assert model.structure[-2] != (d-1,)
 
 def test_constrained_sl_structure_learning_exclude_edges():
     for X in datasets:
@@ -1143,9 +1184,9 @@
         # Learn constrained network
         model = BayesianNetwork.from_samples(X, algorithm='greedy', 
             constraint_graph=cg, exclude_edges=[(1, d-1), (d-1, d-2)])    
-        assert_not_equal(model.structure[-1], (1,))
-        assert_not_equal(model.structure[-2], (d-1,))
-        assert_equal(model.structure[-2], (1,))
+        assert model.structure[-1] != (1,)
+        assert model.structure[-2] != (d-1,)
+        assert model.structure[-2] == (1,)
 
 def test_low_memory_constrained_sl_structure_learning_exclude_edges():
     for X in datasets:
@@ -1163,9 +1204,9 @@
         model = BayesianNetwork.from_samples(X, algorithm='greedy', 
             low_memory=True, constraint_graph=cg, 
             exclude_edges=[(1, d-1), (d-1, d-2)])    
-        assert_not_equal(model.structure[-1], (1,))
-        assert_not_equal(model.structure[-2], (d-1,))
-        assert_equal(model.structure[-2], (1,))
+        assert model.structure[-1] != (1,)
+        assert model.structure[-2] != (d-1,)
+        assert model.structure[-2] == (1,)
 
 def test_constrained_parents_structure_learning_exclude_edges():
     for X in datasets:
@@ -1187,13 +1228,13 @@
         # Learn constrained network
         model1 = BayesianNetwork.from_samples(X, algorithm='exact', 
             constraint_graph=cg, exclude_edges=[(1, d-1)])
-        assert_not_equal(model1.structure[-1], (1,))
-        assert_equal(model1.structure[-2], (1,))
+        assert model1.structure[-1] != (1,)
+        assert model1.structure[-2] == (1,)
 
         model2 = BayesianNetwork.from_samples(X, algorithm='exact',
             constraint_graph=cg)
-        assert_equal(model2.structure[-1], (1,))
-        assert_equal(model2.structure[-2], (1,))
+        assert model2.structure[-1] == (1,)
+        assert model2.structure[-2] == (1,)
 
     X = numpy.random.randint(2, size=(50, 8))
     X[:,0] = X[:,4]
@@ -1207,13 +1248,13 @@
 
     model = BayesianNetwork.from_samples(X, algorithm='exact', 
         constraint_graph=cg, exclude_edges=[(0, 4), (2, 7)])
-    assert_not_equal(model.structure[7], (2,))
-    assert_not_equal(model.structure[4], (0,))
+    assert model.structure[7] != (2,)
+    assert model.structure[4] != (0,)
 
     model = BayesianNetwork.from_samples(X, algorithm='exact',
         constraint_graph=cg)
-    assert_equal(model.structure[7], (2,))
-    assert_equal(model.structure[4], (0,))
+    assert model.structure[7] == (2,)
+    assert model.structure[4] == (0,)
 
 def test_low_memory_constrained_parents_structure_learning_exclude_edges():
     for X in datasets:
@@ -1235,13 +1276,13 @@
         # Learn constrained network
         model1 = BayesianNetwork.from_samples(X, algorithm='exact', 
             low_memory=True, constraint_graph=cg, exclude_edges=[(1, d-1)])
-        assert_not_equal(model1.structure[-1], (1,))
-        assert_equal(model1.structure[-2], (1,))
+        assert model1.structure[-1] != (1,)
+        assert model1.structure[-2] == (1,)
 
         model2 = BayesianNetwork.from_samples(X, algorithm='exact',
             low_memory=True, constraint_graph=cg)
-        assert_equal(model2.structure[-1], (1,))
-        assert_equal(model2.structure[-2], (1,))
+        assert model2.structure[-1] == (1,)
+        assert model2.structure[-2] == (1,)
 
     X = numpy.random.randint(2, size=(50, 8))
     X[:,0] = X[:,4]
@@ -1255,13 +1296,13 @@
 
     model = BayesianNetwork.from_samples(X, algorithm='exact', 
         low_memory=True, constraint_graph=cg, exclude_edges=[(0, 4), (2, 7)])
-    assert_not_equal(model.structure[7], (2,))
-    assert_not_equal(model.structure[4], (0,))
+    assert model.structure[7] != (2,)
+    assert model.structure[4] != (0,)
 
     model = BayesianNetwork.from_samples(X, algorithm='exact',
         low_memory=True, constraint_graph=cg)
-    assert_equal(model.structure[7], (2,))
-    assert_equal(model.structure[4], (0,))
+    assert model.structure[7] == (2,)
+    assert model.structure[4] == (0,)
 
 def test_constrained_slap_structure_learning_exclude_edges():
     for X in datasets:
@@ -1284,8 +1325,8 @@
         # Learn constrained network
         model1 = BayesianNetwork.from_samples(X, algorithm='exact', 
             constraint_graph=cg, exclude_edges=[(1, d-1)])
-        assert_not_equal(model1.structure[-1], (1,))
-        assert_equal(model1.structure[-1], (d-2,))
+        assert model1.structure[-1] != (1,)
+        assert model1.structure[-1] == (d-2,)
 
         #model2 = BayesianNetwork.from_samples(X, algorithm='exact',
         #    constraint_graph=cg)
@@ -1304,13 +1345,13 @@
 
     model = BayesianNetwork.from_samples(X, algorithm='exact', 
         constraint_graph=cg, exclude_edges=[(0, 4), (2, 7)])
-    assert_not_equal(model.structure[7], (2,))
-    assert_not_equal(model.structure[4], (0,))
+    assert model.structure[7] != (2,)
+    assert model.structure[4] != (0,)
 
     model = BayesianNetwork.from_samples(X, algorithm='exact',
         constraint_graph=cg)
-    assert_equal(model.structure[7], (2,))
-    assert_equal(model.structure[4], (0,))
+    assert model.structure[7] == (2,)
+    assert model.structure[4] == (0,)
 
 def test_constrained_parents_structure_learning():
     logps1 = [-12.2173, -207.3633, -3462.7469, -480.0970]
@@ -1339,15 +1380,15 @@
         
         # Check structure constraints satisfied
         for node in g1:
-            assert_equal(0, len(model1.structure[node]))
+            assert 0 == len(model1.structure[node])
         
-        assert_equal(model1.structure[-1], (1,))
-        assert_equal(model1.structure[-2], (1,))
+        assert model1.structure[-1] == (1,)
+        assert model1.structure[-2] == (1,)
 
         model2 = BayesianNetwork.from_samples(X, algorithm='exact')
         assert_almost_equal(model2.log_probability(X).sum(), logp2, 4)
-        assert_equal(model2.structure[-1], (d-2,))
-        assert_equal(model2.structure[-2], (1,))
+        assert model2.structure[-1] == (d-2,)
+        assert model2.structure[-2] == (1,)
 
 def test_constrained_slap_structure_learning():
     logps = [-21.7780, -345.9527, -4847.5969, -611.0356]
@@ -1371,42 +1412,42 @@
         
         # Check structure constraints satisfied
         for node in g1:
-            assert_equal(0, len(model.structure[node]))
+            assert 0 == len(model.structure[node])
 
 def test_from_structure():
     X = datasets[1]
     structure = ((1, 2), (4,), (), (), (3,))
     model = BayesianNetwork.from_structure(X, structure=structure)
 
-    assert_equal(model.structure, structure)
+    assert model.structure == structure
     assert_almost_equal(model.log_probability(X).sum(), -344.38287, 4)
 
     model2 = BayesianNetwork.from_json(model.to_json())
-    assert_equal(model2.structure, structure)
+    assert model2.structure == structure
     assert_almost_equal(model.log_probability(X).sum(), -344.38287, 4)
 
     model_dtype = type(model.states[0].distribution.parameters[0][0][0])
     model2_dtype = type(model2.states[0].distribution.parameters[0][0][0])
-    assert_equal(model_dtype, model2_dtype)
+    assert model_dtype == model2_dtype
 
 def test_robust_from_structure():
     X = datasets[1]
     structure = ((1, 2), (4,), (), (), (3,))
     model = BayesianNetwork.from_structure(X, structure=structure)
 
-    assert_equal(model.structure, structure)
+    assert model.structure == structure
     assert_almost_equal(model.log_probability(X).sum(), -344.38287, 4)
 
     model2 = from_json(model.to_json())
-    assert_equal(model2.structure, structure)
+    assert model2.structure == structure
     assert_almost_equal(model.log_probability(X).sum(), -344.38287, 4)
 
     model_dtype = type(model.states[0].distribution.parameters[0][0][0])
     model2_dtype = type(model2.states[0].distribution.parameters[0][0][0])
-    assert_equal(model_dtype, model2_dtype)
+    assert model_dtype == model2_dtype
 
-@with_setup(setup_random_mixed)
-def test_from_json():
+def test_from_json(random_mixed):
+    X, weights, data_generator, model = random_mixed
     model2 = BayesianNetwork.from_json(model.to_json())
 
     logp1 = model.log_probability(X)
@@ -1427,10 +1468,10 @@
 
     model_dtype = type(list(model.states[0].distribution.parameters[0].keys())[0])
     model2_dtype = type(list(model2.states[0].distribution.parameters[0].keys())[0])
-    assert_equal(model_dtype, model2_dtype)
+    assert model_dtype == model2_dtype
 
-@with_setup(setup_random_mixed)
-def test_robust_from_json():
+def test_robust_from_json(random_mixed):
+    X, weights, data_generator, model = random_mixed
     model2 = from_json(model.to_json())
 
     logp1 = model.log_probability(X)
@@ -1451,46 +1492,46 @@
 
     model_dtype = type(list(model.states[0].distribution.parameters[0].keys())[0])
     model2_dtype = type(list(model2.states[0].distribution.parameters[0].keys())[0])
-    assert_equal(model_dtype, model2_dtype)
+    assert model_dtype == model2_dtype
 
 def test_float64_from_json():
     X = datasets[1].astype('float64')
     structure = ((1, 2), (4,), (), (), (3,))
     model = BayesianNetwork.from_structure(X, structure=structure)
 
-    assert_equal(model.structure, structure)
+    assert model.structure == structure
     assert_almost_equal(model.log_probability(X).sum(), -344.38287, 4)
 
     model2 = BayesianNetwork.from_json(model.to_json())
-    assert_equal(model2.structure, structure)
+    assert model2.structure == structure
     assert_almost_equal(model.log_probability(X).sum(), -344.38287, 4)
 
     model_dtype = type(model.states[0].distribution.parameters[0][0][0])
     model2_dtype = type(model2.states[0].distribution.parameters[0][0][0])
-    assert_equal(model_dtype, model2_dtype)
+    assert model_dtype == model2_dtype
 
 def test_robust_float64_from_json():
     X = datasets[1].astype('float64')
     structure = ((1, 2), (4,), (), (), (3,))
     model = BayesianNetwork.from_structure(X, structure=structure)
 
-    assert_equal(model.structure, structure)
+    assert model.structure == structure
     assert_almost_equal(model.log_probability(X).sum(), -344.38287, 4)
 
     model2 = from_json(model.to_json())
-    assert_equal(model2.structure, structure)
+    assert model2.structure == structure
     assert_almost_equal(model.log_probability(X).sum(), -344.38287, 4)
 
     model_dtype = type(model.states[0].distribution.parameters[0][0][0])
     model2_dtype = type(model2.states[0].distribution.parameters[0][0][0])
-    assert_equal(model_dtype, model2_dtype)
+    assert model_dtype == model2_dtype
 
 def test_parallel_structure_learning():
     logps = -19.8282, -345.9527, -4847.59688, -604.0190
     for X, logp in zip(datasets, logps):
         model = BayesianNetwork.from_samples(X, algorithm='exact')
         model2 = BayesianNetwork.from_samples(X, algorithm='exact', n_jobs=2)
-        assert_equal(model.log_probability(X).sum(), model2.log_probability(X).sum())
+        assert model.log_probability(X).sum() == model2.log_probability(X).sum()
         assert_almost_equal(model.log_probability(X).sum(), logp, 4)
 
 
@@ -1504,10 +1545,10 @@
     for X in datasets:
         model = BayesianNetwork.from_samples(X, algorithm='greedy', 
             include_edges=[(1, 3)])
-        assert_equal(model.structure[3], (1,))
+        assert model.structure[3] == (1,)
 
         model = BayesianNetwork.from_samples(X, algorithm='greedy')
-        assert_not_equal(model.structure[3], (1,))
+        assert model.structure[3] != (1,)
 
 def test_greedy_structure_learning_exclude_edges():
     for X in datasets:
@@ -1520,24 +1561,24 @@
         # Learn constrained network
         model = BayesianNetwork.from_samples(X, algorithm='greedy', 
             exclude_edges=[(1, d-1), (d-1, d-2)])    
-        assert_not_equal(model.structure[-1], (1,))
-        assert_not_equal(model.structure[-2], (d-1,))
-        assert_equal(model.structure[-2], (1,))
+        assert model.structure[-1] != (1,)
+        assert model.structure[-2] != (d-1,)
+        assert model.structure[-2] == (1,)
 
 def test_greedy_penalized_structure_learning():
     n_parents = [(5, 3, 4), (10, 0, 1), (21, 0, 5), (26, 1, 21)]
     for X, n_parents in zip(datasets, n_parents):
         model = BayesianNetwork.from_samples(X, algorithm='greedy', penalty=0)
-        assert_equal(sum(map(len, model.structure)), n_parents[0])
+        assert sum(map(len, model.structure)) == n_parents[0]
 
         model = BayesianNetwork.from_samples(X, algorithm='greedy')
-        assert_equal(sum(map(len, model.structure)), n_parents[1])
+        assert sum(map(len, model.structure)) == n_parents[1]
 
         model = BayesianNetwork.from_samples(X, algorithm='greedy', penalty=1)
-        assert_equal(sum(map(len, model.structure)), n_parents[2])
+        assert sum(map(len, model.structure)) == n_parents[2]
 
         model = BayesianNetwork.from_samples(X, algorithm='greedy', penalty=100)
-        assert_equal(sum(map(len, model.structure)), 0)
+        assert sum(map(len, model.structure)) == 0
 
 def test_chow_liu_structure_learning():
     logps = -19.8282, -344.248785, -4842.40158, -603.2370
@@ -1552,7 +1593,7 @@
         model = BayesianNetwork.from_samples(X, algorithm='exact')
         model2 = BayesianNetwork.from_samples(X, algorithm='exact-dp')
 
-        assert_equal(model.log_probability(X).sum(), model2.log_probability(X).sum())
+        assert model.log_probability(X).sum() == model2.log_probability(X).sum()
         assert_almost_equal(model.log_probability(X).sum(), logp, 4)
 
 
@@ -1562,8 +1603,8 @@
         model = BayesianNetwork.from_samples(X, algorithm='greedy')
         assert_almost_equal(model.log_probability(X).sum(), logp, 4)
 
-@with_setup(setup_random_mixed, teardown)
-def test_io_log_probability():
+def test_io_log_probability(random_mixed):
+    X, weights, data_generator, model = random_mixed
     X2 = DataGenerator(X)
     X3 = DataFrameGenerator(pandas.DataFrame(X))
 
@@ -1574,8 +1615,8 @@
     assert_array_almost_equal(logp1, logp2)
     assert_array_almost_equal(logp1, logp3)
 
-@with_setup(setup_random_mixed, teardown)
-def test_io_predict():
+def test_io_predict(random_mixed):
+    X, weights, data_generator, model = random_mixed
     X2 = DataGenerator(X)
     X3 = DataFrameGenerator(pandas.DataFrame(X))
 
@@ -1586,8 +1627,8 @@
     assert_array_equal(y_hat1, y_hat2)
     assert_array_equal(y_hat1, y_hat3)
 
-@with_setup(setup_random_mixed, teardown)
-def test_io_fit():
+def test_io_fit(random_mixed):
+    X, weights, data_generator, model = random_mixed
     d1 = DiscreteDistribution({True: 0.6, False: 0.4})
     d2 = ConditionalProbabilityTable([
         [True, 'A', 0.2],
@@ -1639,8 +1680,8 @@
 
     assert_array_almost_equal(logp1, logp2)
 
-@with_setup(setup_random_mixed, teardown)
-def test_io_from_samples():
+def test_io_from_samples(random_mixed):
+    X, weights, data_generator, model = random_mixed
     model1 = BayesianNetwork.from_samples(X, weights=weights)
     model2 = BayesianNetwork.from_samples(data_generator)
 
@@ -1649,8 +1690,8 @@
 
     assert_array_almost_equal(logp1, logp2)
 
-@with_setup(setup_random_mixed, teardown)
-def test_io_from_structure():
+def test_io_from_structure(random_mixed):
+    X, weights, data_generator, model = random_mixed
     structure = ((2,), (0, 2), ())
 
     model1 = BayesianNetwork.from_structure(X=X, weights=weights,
--- python-pomegranate.orig/tests/test_custom_distributions.py
+++ python-pomegranate/tests/test_custom_distributions.py
@@ -1,20 +1,13 @@
 from __future__ import (division)
 
 from pomegranate import *
-from .tools import with_setup
-from .tools import assert_almost_equal
-from .tools import assert_equal
-from .tools import assert_not_equal
-from .tools import assert_less_equal
-from .tools import assert_raises
-from .tools import assert_true
 from numpy.testing import assert_array_equal
 from numpy.testing import assert_array_almost_equal
 
 import numpy
 import scipy.stats
 
-nan = numpy.nan
+import pytest
 
 class NormalDistribution2():
 	def __init__(self, mu, std):
@@ -116,10 +109,9 @@
 	model.bake()
 	return model
 
-def setup_normal_hmm():
-	global model1
-	global model2
 
+@pytest.fixture
+def normal_hmm():
 	d1 = NormalDistribution(0, 1)
 	d2 = NormalDistribution(1, 1)
 	model1 = build_model(d1, d2)
@@ -127,24 +119,21 @@
 	d3 = NormalDistribution2(0, 1)
 	d4 = NormalDistribution2(1, 1)
 	model2 = build_model(d3, d4)
+	return model1, model2
 
-def setup_mgd_hmm():
-	global model1
-	global model2
-
+@pytest.fixture
+def mgd_hmm():
 	d1 = MultivariateGaussianDistribution(numpy.zeros(3), numpy.eye(3))
 	d2 = MultivariateGaussianDistribution(numpy.ones(3), numpy.eye(3))
 	model1 = build_model(d1, d2)
-	
 
 	d3 = MultivariateGaussianDistribution2(numpy.zeros(3), numpy.eye(3))
 	d4 = MultivariateGaussianDistribution2(numpy.ones(3), numpy.eye(3))
 	model2 = build_model(d3, d4)
+	return model1, model2
 
-def setup_icd_hmm():
-	global model1
-	global model2
-
+@pytest.fixture
+def icd_hmm():
 	d1 = IndependentComponentsDistribution([NormalDistribution(0, 1) for _ in range(5)])
 	d2 = IndependentComponentsDistribution([NormalDistribution(1, 1) for _ in range(5)])
 	model1 = build_model(d1, d2)
@@ -152,15 +141,16 @@
 	d3 = IndependentComponentsDistribution([NormalDistribution2(0, 1) for _ in range(5)])
 	d4 = IndependentComponentsDistribution([NormalDistribution2(1, 1) for _ in range(5)])
 	model2 = build_model(d3, d4)
+	return model1, model2
 
 def test_custom_normal_gmm_init():
 	d1 = NormalDistribution2(0, 1)
 	d2 = NormalDistribution2(1, 1)
 	model = GeneralMixtureModel([d1, d2])
 
-	assert_equal(d1.d, 1)
-	assert_equal(d2.d, 1)
-	assert_equal(model.d, 1)
+	assert d1.d == 1
+	assert d2.d == 1
+	assert model.d == 1
 
 def test_custom_normal_gmm_logp():
 	X = numpy.random.normal(0.5, 1, size=(20, 1))
@@ -247,9 +237,9 @@
 	d2 = MultivariateGaussianDistribution2(numpy.ones(3), numpy.eye(3))
 	model = GeneralMixtureModel([d1, d2])
 
-	assert_equal(d1.d, 3)
-	assert_equal(d2.d, 3)
-	assert_equal(model.d, 3)
+	assert d1.d == 3
+	assert d2.d == 3
+	assert model.d == 3
 
 def test_custom_mgd_gmm_logp():
 	X = numpy.random.normal(0.5, 1, size=(20,3))
@@ -337,9 +327,9 @@
 	d2 = IndependentComponentsDistribution([NormalDistribution2(1, 1) for _ in range(5)])
 	model = GeneralMixtureModel([d1, d2])
 
-	assert_equal(d1.d, 5)
-	assert_equal(d2.d, 5)
-	assert_equal(model.d, 5)
+	assert d1.d == 5
+	assert d2.d == 5
+	assert model.d == 5
 
 def test_custom_icd_gmm_logp():
 	X = numpy.random.normal(0.5, 1, size=(20,5))
@@ -426,9 +416,9 @@
 	d2 = NormalDistribution2(1, 1)
 	model = NaiveBayes([d1, d2])
 
-	assert_equal(d1.d, 1)
-	assert_equal(d2.d, 1)
-	assert_equal(model.d, 1)
+	assert d1.d == 1
+	assert d2.d == 1
+	assert model.d == 1
 
 def test_custom_normal_nb_logp():
 	X = numpy.random.normal(0.5, 1, size=(20, 1))
@@ -519,9 +509,9 @@
 	d2 = MultivariateGaussianDistribution2(numpy.ones(3), numpy.eye(3))
 	model = BayesClassifier([d1, d2])
 
-	assert_equal(d1.d, 3)
-	assert_equal(d2.d, 3)
-	assert_equal(model.d, 3)
+	assert d1.d == 3
+	assert d2.d == 3
+	assert model.d == 3
 
 def test_custom_mgd_bc_logp():
 	X = numpy.random.normal(0.5, 1, size=(20,3))
@@ -612,9 +602,9 @@
 	d2 = IndependentComponentsDistribution([NormalDistribution2(1, 1) for _ in range(5)])
 	model = NaiveBayes([d1, d2])
 
-	assert_equal(d1.d, 5)
-	assert_equal(d2.d, 5)
-	assert_equal(model.d, 5)
+	assert d1.d == 5
+	assert d2.d == 5
+	assert model.d == 5
 
 def test_custom_icd_nb_logp():
 	X = numpy.random.normal(0.5, 1, size=(20,5))
@@ -700,33 +690,33 @@
 
 	assert_array_almost_equal(model1.log_probability(X), model2.log_probability(X))
 
-@with_setup(setup_normal_hmm)
-def test_custom_normal_hmm_init():
-	assert_equal(model1.d, 1)
-	assert_equal(model2.d, 1)
+def test_custom_normal_hmm_init(normal_hmm):
+	model1, model2 = normal_hmm
+	assert model1.d == 1
+	assert model2.d == 1
 
-@with_setup(setup_normal_hmm)
-def test_custom_normal_hmm_logp():
+def test_custom_normal_hmm_logp(normal_hmm):
+	model1, model2 = normal_hmm
 	X = numpy.random.normal(0.5, 1, size=(5, 20, 1))
 	assert_array_almost_equal(model1.log_probability(X), model2.log_probability(X))
 
-@with_setup(setup_normal_hmm)
-def test_custom_normal_hmm_predict():
+def test_custom_normal_hmm_predict(normal_hmm):
+	model1, model2 = normal_hmm
 	X = numpy.random.normal(0.5, 1, size=(5, 20, 1))
 	assert_array_equal(model1.predict(X), model2.predict(X))
 
-@with_setup(setup_normal_hmm)
-def test_custom_normal_hmm_predict_proba():
+def test_custom_normal_hmm_predict_proba(normal_hmm):
+	model1, model2 = normal_hmm
 	X = numpy.random.normal(0.5, 1, size=(5, 20, 1))
 	assert_array_almost_equal(model1.predict_proba(X), model2.predict_proba(X))
 
-@with_setup(setup_normal_hmm)
-def test_custom_normal_hmm_predict_log_proba():
+def test_custom_normal_hmm_predict_log_proba(normal_hmm):
+	model1, model2 = normal_hmm
 	X = numpy.random.normal(0.5, 1, size=(5, 20, 1))
 	assert_array_almost_equal(model1.predict_log_proba(X), model2.predict_log_proba(X))
 
-@with_setup(setup_normal_hmm)
-def test_custom_normal_hmm_fit():
+def test_custom_normal_hmm_fit(normal_hmm):
+	model1, model2 = normal_hmm
 	X = numpy.random.normal(0.5, 1, size=(5, 20, 1))
 
 	model1.fit(X, max_iterations=5)
@@ -743,33 +733,33 @@
 
 	assert_array_almost_equal(model1.log_probability(X), model2.log_probability(X))
 
-@with_setup(setup_mgd_hmm)
-def test_custom_mgd_hmm_init():
-	assert_equal(model1.d, 3)
-	assert_equal(model2.d, 3)
+def test_custom_mgd_hmm_init(mgd_hmm):
+	model1, model2 = mgd_hmm
+	assert model1.d == 3
+	assert model2.d == 3
 
-@with_setup(setup_mgd_hmm)
-def test_custom_mgd_hmm_logp():
+def test_custom_mgd_hmm_logp(mgd_hmm):
+	model1, model2 = mgd_hmm
 	X = numpy.random.normal(0.5, 1, size=(5, 20, 3))
 	assert_array_almost_equal(model1.log_probability(X), model2.log_probability(X))
 
-@with_setup(setup_mgd_hmm)
-def test_custom_mgd_hmm_predict():
+def test_custom_mgd_hmm_predict(mgd_hmm):
+	model1, model2 = mgd_hmm
 	X = numpy.random.normal(0.5, 1, size=(5, 20, 3))
 	assert_array_equal(model1.predict(X), model2.predict(X))
 
-@with_setup(setup_mgd_hmm)
-def test_custom_mgd_hmm_predict_proba():
+def test_custom_mgd_hmm_predict_proba(mgd_hmm):
+	model1, model2 = mgd_hmm
 	X = numpy.random.normal(0.5, 1, size=(5, 20, 3))
 	assert_array_almost_equal(model1.predict_proba(X), model2.predict_proba(X))
 
-@with_setup(setup_mgd_hmm)
-def test_custom_mgd_hmm_predict_log_proba():
+def test_custom_mgd_hmm_predict_log_proba(mgd_hmm):
+	model1, model2 = mgd_hmm
 	X = numpy.random.normal(0.5, 1, size=(5, 20, 3))
 	assert_array_almost_equal(model1.predict_log_proba(X), model2.predict_log_proba(X))
 
-@with_setup(setup_mgd_hmm)
-def test_custom_mgd_hmm_fit():
+def test_custom_mgd_hmm_fit(mgd_hmm):
+	model1, model2 = mgd_hmm
 	X = numpy.random.normal(0, 0.1, size=(2, 50, 3))
 	X[:, ::2] += 1
 
@@ -787,33 +777,33 @@
 
 	assert_array_almost_equal(model1.log_probability(X), model2.log_probability(X))
 
-@with_setup(setup_icd_hmm)
-def test_custom_icd_hmm_init():
-	assert_equal(model1.d, 5)
-	assert_equal(model2.d, 5)
+def test_custom_icd_hmm_init(icd_hmm):
+	model1, model2 = icd_hmm
+	assert model1.d == 5
+	assert model2.d == 5
 
-@with_setup(setup_icd_hmm)
-def test_custom_icd_hmm_logp():
+def test_custom_icd_hmm_logp(icd_hmm):
+	model1, model2 = icd_hmm
 	X = numpy.random.normal(0.5, 1, size=(5, 20, 5))
 	assert_array_almost_equal(model1.log_probability(X), model2.log_probability(X))
 
-@with_setup(setup_icd_hmm)
-def test_custom_icd_hmm_predict():
+def test_custom_icd_hmm_predict(icd_hmm):
+	model1, model2 = icd_hmm
 	X = numpy.random.normal(0.5, 1, size=(5, 20, 5))
 	assert_array_almost_equal(model1.predict(X), model2.predict(X))
 
-@with_setup(setup_icd_hmm)
-def test_custom_icd_hmm_predict_proba():
+def test_custom_icd_hmm_predict_proba(icd_hmm):
+	model1, model2 = icd_hmm
 	X = numpy.random.normal(0.5, 1, size=(5, 20, 5))
 	assert_array_almost_equal(model1.predict_proba(X), model2.predict_proba(X))
 
-@with_setup(setup_icd_hmm)
-def test_custom_icd_hmm_predict_log_proba():
+def test_custom_icd_hmm_predict_log_proba(icd_hmm):
+	model1, model2 = icd_hmm
 	X = numpy.random.normal(0.5, 1, size=(5, 20, 5))
 	assert_array_almost_equal(model1.predict_log_proba(X), model2.predict_log_proba(X))
 
-@with_setup(setup_icd_hmm)
-def test_custom_icd_hmm_fit():
+def test_custom_icd_hmm_fit(icd_hmm):
+	model1, model2 = icd_hmm
 	X = numpy.random.normal(0.5, 1, size=(3, 20, 5))
 
 	model1.fit(X, max_iterations=5)
--- python-pomegranate.orig/tests/test_distributions.py
+++ python-pomegranate/tests/test_distributions.py
@@ -18,28 +18,16 @@
 						 BernoulliDistribution,
 						 from_json)
 
-from .tools import with_setup
-from .tools import assert_almost_equal
-from .tools import assert_equal
-from .tools import assert_not_equal
-from .tools import assert_less_equal
-from .tools import assert_true
-from .tools import assert_raises
+from .assert_tools import assert_almost_equal
 from numpy.testing import assert_array_equal
 from numpy.testing import assert_array_almost_equal
 import pickle
 import numpy
+import pytest
 
 nan = numpy.nan
 inf = float("inf")
 
-def setup():
-	pass
-
-
-def teardown():
-	pass
-
 
 def discrete_equality(x, y, z=8):
 	'''
@@ -56,22 +44,25 @@
 
 def test_distributions_uniform_initialization():
 	d = UniformDistribution(0, 10)
-	assert_equal(d.name, "UniformDistribution")
+	assert d.name == "UniformDistribution"
 	assert_array_equal(d.parameters, [0, 10])
 	assert_array_equal(d.summaries, [inf, -inf, 0])
 
 
 def test_distributions_uniform_blank():
 	d = UniformDistribution.blank()
-	assert_equal(d.name, "UniformDistribution")
+	assert d.name == "UniformDistribution"
 	assert_array_equal(d.parameters, [0, 0])
 	assert_array_equal(d.summaries, [inf, -inf, 0])
 
 
 def test_distributions_uniform_initialization_error():
-	assert_raises(TypeError, UniformDistribution, 0)
-	assert_raises(TypeError, UniformDistribution, [0, 10])
-	assert_raises(TypeError, UniformDistribution, 0, 10, 4, 7, 3)
+	with pytest.raises(TypeError):
+		UniformDistribution(0)
+	with pytest.raises(TypeError):
+		UniformDistribution([0, 10])
+	with pytest.raises(TypeError):
+		UniformDistribution(0, 10, 4, 7, 3)
 
 
 def test_distributions_uniform_log_probability():
@@ -79,26 +70,26 @@
 	e = UniformDistribution(0., 10.)
 
 	assert_almost_equal(d.log_probability(5), -2.302585092)
-	assert_equal(d.log_probability(5), e.log_probability(5))
-	assert_equal(d.log_probability(5), d.log_probability(5.))
+	assert d.log_probability(5) == e.log_probability(5)
+	assert d.log_probability(5) == d.log_probability(5.)
 
 	assert_almost_equal(d.log_probability(0), -2.302585092)
-	assert_equal(d.log_probability(0), e.log_probability(0.))
+	assert d.log_probability(0) == e.log_probability(0.)
 
-	assert_equal(d.log_probability(-1), -inf)
-	assert_equal(d.log_probability(11), -inf)
+	assert d.log_probability(-1) == -inf
+	assert d.log_probability(11) == -inf
 
 
 def test_distributions_uniform_nan_log_probability():
 	d = UniformDistribution(0, 10)
 
-	assert_equal(d.log_probability(nan), 0)
+	assert d.log_probability(nan) == 0
 	assert_array_almost_equal(d.log_probability([nan, 5]), [0, -2.302585092])
 
 
 def test_distributions_uniform_underflow_log_probability():
 	d = UniformDistribution(0, 10)
-	assert_equal(d.log_probability(1e100), float("-inf"))
+	assert d.log_probability(1e100) == float("-inf")
 
 
 def test_distributions_uniform_probability():
@@ -106,20 +97,20 @@
 	e = UniformDistribution(0., 10.)
 
 	assert_almost_equal(d.probability(5), 0.0999999999)
-	assert_equal(d.probability(5), e.probability(5))
-	assert_equal(d.probability(5), d.probability(5.))
+	assert d.probability(5) == e.probability(5)
+	assert d.probability(5) == d.probability(5.)
 
 	assert_almost_equal(d.probability(0), 0.0999999999)
-	assert_equal(d.probability(0), e.probability(0.))
+	assert d.probability(0) == e.probability(0.)
 
-	assert_equal(d.probability(-1), 0)
-	assert_equal(d.probability(11), 0)
+	assert d.probability(-1) == 0
+	assert d.probability(11) == 0
 
 
 def test_distributions_uniform_nan_probability():
 	d = UniformDistribution(0, 10)
 
-	assert_equal(d.probability(nan), 1)
+	assert d.probability(nan) == 1
 	assert_array_almost_equal(d.probability([nan, 5]), [1, 0.0999999999])
 
 
@@ -135,10 +126,10 @@
 	d.fit([5, 4, 5, 4, 6, 5, 6, 5, 4, 6, 5, 4])
 
 	assert_array_equal(d.parameters, [4, 6])
-	assert_not_equal(d.log_probability(4), e.log_probability(4))
+	assert d.log_probability(4) != e.log_probability(4)
 	assert_almost_equal(d.log_probability(4), -0.69314718055994529)
-	assert_equal(d.log_probability(18), -inf)
-	assert_equal(d.log_probability(1e8), -inf)
+	assert d.log_probability(18) == -inf
+	assert d.log_probability(1e8) == -inf
 	assert_array_equal(d.summaries, [inf, -inf, 0])
 
 
@@ -149,10 +140,10 @@
 	d.fit([5, 4, nan, 5, 4, nan, 6, 5, 6, nan, nan, 5, 4, 6, nan, 5, 4, nan])
 
 	assert_array_equal(d.parameters, [4, 6])
-	assert_not_equal(d.log_probability(4), e.log_probability(4))
+	assert d.log_probability(4) != e.log_probability(4)
 	assert_almost_equal(d.log_probability(4), -0.69314718055994529)
-	assert_equal(d.log_probability(18), -inf)
-	assert_equal(d.log_probability(1e8), -inf)
+	assert d.log_probability(18) == -inf
+	assert d.log_probability(1e8) == -inf
 	assert_array_equal(d.summaries, [inf, -inf, 0])
 
 
@@ -234,7 +225,7 @@
 	d = UniformDistribution(0, 10)
 
 	e = pickle.loads(pickle.dumps(d))
-	assert_equal(e.name, "UniformDistribution")
+	assert e.name == "UniformDistribution"
 	assert_array_equal(e.parameters, [0, 10])
 	assert_array_equal(d.summaries, [inf, -inf, 0])
 
@@ -243,7 +234,7 @@
 	d = UniformDistribution(0, 10)
 
 	e = Distribution.from_json(d.to_json())
-	assert_equal(e.name, "UniformDistribution")
+	assert e.name == "UniformDistribution"
 	assert_array_equal(e.parameters, [0, 10])
 	assert_array_equal(d.summaries, [inf, -inf, 0])
 
@@ -251,7 +242,7 @@
 	d = UniformDistribution(0, 10)
 
 	e = from_json(d.to_json())
-	assert_equal(e.name, "UniformDistribution")
+	assert e.name == "UniformDistribution"
 	assert_array_equal(e.parameters, [0, 10])
 	assert_array_equal(d.summaries, [inf, -inf, 0])
 
@@ -262,26 +253,30 @@
 		4.88411189])
 
 	assert_array_almost_equal(d.sample(5, random_state=5), x)
-	assert_raises(AssertionError, assert_array_almost_equal, d.sample(5), x)
+	with pytest.raises(AssertionError):
+		assert_array_almost_equal(d.sample(5), x)
 
 def test_distributions_normal_initialization():
 	d = NormalDistribution(5, 2)
-	assert_equal(d.name, "NormalDistribution")
+	assert d.name == "NormalDistribution"
 	assert_array_equal(d.parameters, [5, 2])
 	assert_array_equal(d.summaries, [0, 0, 0])
 
 
 def test_distributions_normal_blank():
 	d = NormalDistribution.blank()
-	assert_equal(d.name, "NormalDistribution")
+	assert d.name == "NormalDistribution"
 	assert_array_equal(d.parameters, [0, 1])
 	assert_array_equal(d.summaries, [0, 0, 0])
 
 
 def test_distributions_normal_initialization_error():
-	assert_raises(TypeError, NormalDistribution, 5)
-	assert_raises(TypeError, NormalDistribution, [5, 1])
-	assert_raises(TypeError, NormalDistribution, 5, 1, 4, 7, 3)
+	with pytest.raises(TypeError):
+		NormalDistribution(5)
+	with pytest.raises(TypeError):
+		NormalDistribution([5, 1])
+	with pytest.raises(TypeError):
+		NormalDistribution(5, 1, 4, 7, 3)
 
 
 def test_distributions_normal_log_probability():
@@ -289,23 +284,23 @@
 	e = NormalDistribution(5., 2.)
 
 	assert_almost_equal(d.log_probability(5), -1.61208571)
-	assert_equal(d.log_probability(5), e.log_probability(5))
-	assert_equal(d.log_probability(5), d.log_probability(5.))
+	assert d.log_probability(5) == e.log_probability(5)
+	assert d.log_probability(5) == d.log_probability(5.)
 
 	assert_almost_equal(d.log_probability(0), -4.737085713764219)
-	assert_equal(d.log_probability(0), e.log_probability(0.))
+	assert d.log_probability(0) == e.log_probability(0.)
 
 
 def test_distributions_normal_nan_log_probability():
 	d = NormalDistribution(5, 2)
 
-	assert_equal(d.log_probability(nan), 0)
+	assert d.log_probability(nan) == 0
 	assert_array_almost_equal(d.log_probability([nan, 5]), [0, -1.61208571])
 
 
 def test_distributions_normal_underflow_log_probability():
 	d = NormalDistribution(5, 1e-10)
-	assert_almost_equal(d.log_probability(1e100), -4.9999999999999987e+219, delta=6.270570637641398e+203)
+	assert abs(d.log_probability(1e100) - (-4.9999999999999987e+219)) <= 6.270570637641398e+203
 
 
 def test_distributions_normal_probability():
@@ -313,17 +308,17 @@
 	e = NormalDistribution(5., 2.)
 
 	assert_almost_equal(d.probability(5), 0.19947114)
-	assert_equal(d.probability(5), e.probability(5))
-	assert_equal(d.probability(5), d.probability(5.))
+	assert d.probability(5) == e.probability(5)
+	assert d.probability(5) == d.probability(5.)
 
 	assert_almost_equal(d.probability(0), 0.0087641502)
-	assert_equal(d.probability(0), e.probability(0.))
+	assert d.probability(0) == e.probability(0.)
 
 
 def test_distributions_normal_nan_probability():
 	d = NormalDistribution(5, 2)
 
-	assert_equal(d.probability(nan), 1)
+	assert d.probability(nan) == 1
 	assert_array_almost_equal(d.probability([nan, 5]), [1, 0.199471])
 
 
@@ -339,7 +334,7 @@
 	d.fit([5, 4, 5, 4, 6, 5, 6, 5, 4, 6, 5, 4])
 
 	assert_array_almost_equal(d.parameters, [4.9167, 0.7592], 4)
-	assert_not_equal(d.log_probability(4), e.log_probability(4))
+	assert d.log_probability(4) != e.log_probability(4)
 	assert_almost_equal(d.log_probability(4), -1.3723678499651766)
 	assert_almost_equal(d.log_probability(18), -149.13140399454429)
 	assert_almost_equal(d.log_probability(1e8), -8674697942168743.0, -4)
@@ -353,7 +348,7 @@
 	d.fit([5, 4, nan, 5, 4, nan, 6, 5, 6, nan, nan, 5, 4, 6, nan, 5, 4, nan])
 
 	assert_array_almost_equal(d.parameters, [4.9167, 0.7592], 4)
-	assert_not_equal(d.log_probability(4), e.log_probability(4))
+	assert d.log_probability(4) != e.log_probability(4)
 	assert_almost_equal(d.log_probability(4), -1.3723678499651766)
 	assert_almost_equal(d.log_probability(18), -149.13140399454429)
 	assert_almost_equal(d.log_probability(1e8), -8674697942168743.0, -4)
@@ -438,7 +433,7 @@
 	d = NormalDistribution(5, 2)
 
 	e = pickle.loads(pickle.dumps(d))
-	assert_equal(e.name, "NormalDistribution")
+	assert e.name == "NormalDistribution"
 	assert_array_equal(e.parameters, [5, 2])
 	assert_array_equal(e.summaries, [0, 0, 0])
 
@@ -447,7 +442,7 @@
 	d = NormalDistribution(5, 2)
 
 	e = Distribution.from_json(d.to_json())
-	assert_equal(e.name, "NormalDistribution")
+	assert e.name == "NormalDistribution"
 	assert_array_equal(e.parameters, [5, 2])
 	assert_array_equal(e.summaries, [0, 0, 0])
 
@@ -455,166 +450,165 @@
 	d = NormalDistribution(5, 2)
 
 	e = from_json(d.to_json())
-	assert_equal(e.name, "NormalDistribution")
+	assert e.name == "NormalDistribution"
 	assert_array_equal(e.parameters, [5, 2])
 	assert_array_equal(e.summaries, [0, 0, 0])
 
 def test_distributions_normal_random_sample():
 	d = NormalDistribution(0, 1)
 
-	x = numpy.array([ 0.44122749, -0.33087015,  2.43077119, -0.25209213,  
+	x = numpy.array([ 0.44122749, -0.33087015,	2.43077119, -0.25209213,
 		0.10960984])
 
 	assert_array_almost_equal(d.sample(5, random_state=5), x)
-	assert_raises(AssertionError, assert_array_almost_equal, d.sample(5), x)
+	with pytest.raises(AssertionError):
+		assert_array_almost_equal(d.sample(5), x)
 
 
-@with_setup(setup, teardown)
 def test_distributions_discrete():
 	d = DiscreteDistribution({'A': 0.25, 'C': 0.25, 'G': 0.25, 'T': 0.25})
 
-	assert_equal(d.log_probability('C'), -1.3862943611198906)
-	assert_equal(d.log_probability('A'), d.log_probability('C'))
-	assert_equal(d.log_probability('G'), d.log_probability('T'))
-	assert_equal(d.log_probability('a'), float('-inf'))
+	assert d.log_probability('C') == -1.3862943611198906
+	assert d.log_probability('A') == d.log_probability('C')
+	assert d.log_probability('G') == d.log_probability('T')
+	assert d.log_probability('a') == float('-inf')
 
 	seq = "ACGTACGTTGCATGCACGCGCTCTCGCGC"
 	d.fit(list(seq))
 
-	assert_equal(d.log_probability('C'), -0.9694005571881036)
-	assert_equal(d.log_probability('A'), -1.9810014688665833)
-	assert_equal(d.log_probability('T'), -1.575536360758419)
+	assert d.log_probability('C') == -0.9694005571881036
+	assert d.log_probability('A') == -1.9810014688665833
+	assert d.log_probability('T') == -1.575536360758419
 
 	seq = "ACGTGTG"
 	d.fit(list(seq), weights=[0., 1., 2., 3., 4., 5., 6.])
 
-	assert_equal(d.log_probability('A'), float('-inf'))
-	assert_equal(d.log_probability('C'), -3.044522437723423)
-	assert_equal(d.log_probability('G'), -0.5596157879354228)
+	assert d.log_probability('A') == float('-inf')
+	assert d.log_probability('C') == -3.044522437723423
+	assert d.log_probability('G') == -0.5596157879354228
 
 	d.summarize(list("ACG"), weights=[0., 1., 2.])
 	d.summarize(list("TGT"), weights=[3., 4., 5.])
 	d.summarize(list("G"), weights=[6.])
 	d.from_summaries()
 
-	assert_equal(d.log_probability('A'), float('-inf'))
-	assert_equal(round(d.log_probability('C'), 4), -3.0445)
-	assert_equal(round(d.log_probability('G'), 4), -0.5596)
+	assert d.log_probability('A') == float('-inf')
+	assert round(d.log_probability('C'), 4) == -3.0445
+	assert round(d.log_probability('G'), 4) == -0.5596
 
 	d = DiscreteDistribution({'A': 0.0, 'B': 1.0})
 	d.summarize(list("ABABABAB"))
 	d.summarize(list("ABAB"))
 	d.summarize(list("BABABABABABABABABA"))
 	d.from_summaries(inertia=0.75)
-	assert_equal(d.parameters[0], {'A': 0.125, 'B': 0.875})
+	assert d.parameters[0] == {'A': 0.125, 'B': 0.875}
 
 	d = DiscreteDistribution({'A': 0.0, 'B': 1.0})
 	d.summarize(list("ABABABAB"))
 	d.summarize(list("ABAB"))
 	d.summarize(list("BABABABABABABABABA"))
 	d.from_summaries(inertia=0.5)
-	assert_equal(d.parameters[0], {'A': 0.25, 'B': 0.75})
+	assert d.parameters[0] == {'A': 0.25, 'B': 0.75}
 
 	d.freeze()
 	d.fit(list('ABAABBAAAAAAAAAAAAAAAAAA'))
-	assert_equal(d.parameters[0], {'A': 0.25, 'B': 0.75})
+	assert d.parameters[0] == {'A': 0.25, 'B': 0.75}
 
 	d = DiscreteDistribution.from_samples(['A', 'B', 'A', 'A'])
-	assert_equal(d.parameters[0], {'A': 0.75, 'B': 0.25})
+	assert d.parameters[0] == {'A': 0.75, 'B': 0.25}
 
 	# Test vector input instead of flat array.
 	d = DiscreteDistribution.from_samples(numpy.array(['A', 'B', 'A', 'A']).reshape(-1,1))
-	assert_equal(d.parameters[0], {'A': 0.75, 'B': 0.25})
+	assert d.parameters[0] == {'A': 0.75, 'B': 0.25}
 
 	d = DiscreteDistribution.from_samples(['A', 'B', 'A', 'A'], pseudocount=0.5)
-	assert_equal(d.parameters[0], {'A': 0.70, 'B': 0.30})
+	assert d.parameters[0] == {'A': 0.70, 'B': 0.30}
 
 	d = DiscreteDistribution.from_samples(['A', 'B', 'A', 'A'], pseudocount=6)
-	assert_equal(d.parameters[0], {'A': 0.5625, 'B': 0.4375})
+	assert d.parameters[0] == {'A': 0.5625, 'B': 0.4375}
 
 	e = Distribution.from_json(d.to_json())
-	assert_equal(e.name, "DiscreteDistribution")
-	assert_equal(e.parameters[0], {'A': 0.5625, 'B': 0.4375})
+	assert e.name == "DiscreteDistribution"
+	assert e.parameters[0] == {'A': 0.5625, 'B': 0.4375}
 
 	f = pickle.loads(pickle.dumps(e))
-	assert_equal(f.name, "DiscreteDistribution")
-	assert_equal(f.parameters[0], {'A': 0.5625, 'B': 0.4375})
+	assert f.name == "DiscreteDistribution"
+	assert f.parameters[0] == {'A': 0.5625, 'B': 0.4375}
 
 
 def test_discrete_robust_json_serialization():
 	d = DiscreteDistribution.from_samples(['A', 'B', 'A', 'A'], pseudocount=6)
 
 	e = from_json(d.to_json())
-	assert_equal(e.name, "DiscreteDistribution")
-	assert_equal(e.parameters[0], {'A': 0.5625, 'B': 0.4375})
+	assert e.name == "DiscreteDistribution"
+	assert e.parameters[0] == {'A': 0.5625, 'B': 0.4375}
 
-@with_setup(setup, teardown)
 def test_lognormal():
 	d = LogNormalDistribution(5, 2)
-	assert_equal(round(d.log_probability(5), 4), -4.6585)
+	assert round(d.log_probability(5), 4) == -4.6585
 
 	d.fit([5.1, 5.03, 4.98, 5.05, 4.91, 5.2, 5.1, 5., 4.8, 5.21])
-	assert_equal(round(d.parameters[0], 4), 1.6167)
-	assert_equal(round(d.parameters[1], 4), 0.0237)
+	assert round(d.parameters[0], 4) == 1.6167
+	assert round(d.parameters[1], 4) == 0.0237
 
 	d.summarize([5.1, 5.03, 4.98, 5.05])
 	d.summarize([4.91, 5.2, 5.1])
 	d.summarize([5., 4.8, 5.21])
 	d.from_summaries()
 
-	assert_equal(round(d.parameters[0], 4), 1.6167)
-	assert_equal(round(d.parameters[1], 4), 0.0237)
+	assert round(d.parameters[0], 4) == 1.6167
+	assert round(d.parameters[1], 4) == 0.0237
 
 	e = Distribution.from_json(d.to_json())
-	assert_equal(e.name, "LogNormalDistribution")
-	assert_equal(round(e.parameters[0], 4), 1.6167)
-	assert_equal(round(e.parameters[1], 4), 0.0237)
+	assert e.name == "LogNormalDistribution"
+	assert round(e.parameters[0], 4) == 1.6167
+	assert round(e.parameters[1], 4) == 0.0237
 
 	f = pickle.loads(pickle.dumps(e))
-	assert_equal(f.name, "LogNormalDistribution")
-	assert_equal(round(f.parameters[0], 4), 1.6167)
-	assert_equal(round(f.parameters[1], 4), 0.0237)
+	assert f.name == "LogNormalDistribution"
+	assert round(f.parameters[0], 4) == 1.6167
+	assert round(f.parameters[1], 4) == 0.0237
 
 
 def test_distributions_lognormal_random_sample():
 	d = LogNormalDistribution(0, 1)
 
-	x = numpy.array([1.55461432,  0.71829843, 11.36764528,  0.77717313,  
+	x = numpy.array([1.55461432,  0.71829843, 11.36764528,	0.77717313,
 		1.11584263])
 
 	assert_array_almost_equal(d.sample(5, random_state=5), x)
-	assert_raises(AssertionError, assert_array_almost_equal, d.sample(5), x)
+	with pytest.raises(AssertionError):
+		assert_array_almost_equal(d.sample(5), x)
 
 
-@with_setup(setup, teardown)
 def test_gamma():
 	d = GammaDistribution(5, 2)
-	assert_equal(round(d.log_probability(4), 4), -2.1671)
+	assert round(d.log_probability(4), 4) == -2.1671
 
 	d.fit([2.3, 4.3, 2.7, 2.3, 3.1, 3.2, 3.4, 3.1, 2.9, 2.8])
-	assert_equal(round(d.parameters[0], 4), 31.8806)
-	assert_equal(round(d.parameters[1], 4), 10.5916)
+	assert round(d.parameters[0], 4) == 31.8806
+	assert round(d.parameters[1], 4) == 10.5916
 
 	d = GammaDistribution(2, 7)
-	assert_not_equal(round(d.log_probability(4), 4), -2.1671)
+	assert round(d.log_probability(4), 4) != -2.1671
 
 	d.summarize([2.3, 4.3, 2.7])
 	d.summarize([2.3, 3.1, 3.2])
 	d.summarize([3.4, 3.1, 2.9, 2.8])
 	d.from_summaries()
 
-	assert_equal(round(d.parameters[0], 4), 31.8806)
-	assert_equal(round(d.parameters[1], 4), 10.5916)
+	assert round(d.parameters[0], 4) == 31.8806
+	assert round(d.parameters[1], 4) == 10.5916
 
 	e = Distribution.from_json(d.to_json())
-	assert_equal(e.name, "GammaDistribution")
-	assert_equal(round(e.parameters[0], 4), 31.8806)
-	assert_equal(round(e.parameters[1], 4), 10.5916)
+	assert e.name == "GammaDistribution"
+	assert round(e.parameters[0], 4) == 31.8806
+	assert round(e.parameters[1], 4) == 10.5916
 
 	f = pickle.loads(pickle.dumps(e))
-	assert_equal(f.name, "GammaDistribution")
-	assert_equal(round(f.parameters[0], 4), 31.8806)
-	assert_equal(round(f.parameters[1], 4), 10.5916)
+	assert f.name == "GammaDistribution"
+	assert round(f.parameters[0], 4) == 31.8806
+	assert round(f.parameters[1], 4) == 10.5916
 
 
 def test_distributions_gamma_random_sample():
@@ -623,34 +617,34 @@
 	x = numpy.array([0.049281, 0.042733, 0.238545, 0.773426, 0.088091])
 
 	assert_array_almost_equal(d.sample(5, random_state=5), x)
-	assert_raises(AssertionError, assert_array_almost_equal, d.sample(5), x)
+	with pytest.raises(AssertionError):
+		assert_array_almost_equal(d.sample(5), x)
 
 
-@with_setup(setup, teardown)
 def test_exponential():
 	d = ExponentialDistribution(3)
-	assert_equal(round(d.log_probability(8), 4), -22.9014)
+	assert round(d.log_probability(8), 4) == -22.9014
 
 	d.fit([2.7, 2.9, 3.8, 1.9, 2.7, 1.6, 1.3, 1.0, 1.9])
-	assert_equal(round(d.parameters[0], 4), 0.4545)
+	assert round(d.parameters[0], 4) == 0.4545
 
 	d = ExponentialDistribution(4)
-	assert_not_equal(round(d.log_probability(8), 4), -22.9014)
+	assert round(d.log_probability(8), 4) != -22.9014
 
 	d.summarize([2.7, 2.9, 3.8])
 	d.summarize([1.9, 2.7, 1.6])
 	d.summarize([1.3, 1.0, 1.9])
 	d.from_summaries()
 
-	assert_equal(round(d.parameters[0], 4), 0.4545)
+	assert round(d.parameters[0], 4) == 0.4545
 
 	e = Distribution.from_json(d.to_json())
-	assert_equal(e.name, "ExponentialDistribution")
-	assert_equal(round(e.parameters[0], 4), 0.4545)
+	assert e.name == "ExponentialDistribution"
+	assert round(e.parameters[0], 4) == 0.4545
 
 	f = pickle.loads(pickle.dumps(e))
-	assert_equal(f.name, "ExponentialDistribution")
-	assert_equal(round(f.parameters[0], 4), 0.4545)
+	assert f.name == "ExponentialDistribution"
+	assert round(f.parameters[0], 4) == 0.4545
 
 
 def test_distributions_exponential_random_sample():
@@ -659,40 +653,40 @@
 	x = numpy.array([0.03586, 0.292267, 0.033083, 0.358359, 0.095748])
 
 	assert_array_almost_equal(d.sample(5, random_state=5), x)
-	assert_raises(AssertionError, assert_array_almost_equal, d.sample(5), x)
+	with pytest.raises(AssertionError):
+		assert_array_almost_equal(d.sample(5), x)
 
 
-@with_setup(setup, teardown)
 def test_poisson():
 	d = PoissonDistribution(5)
 
 	assert_almost_equal(d.log_probability(5), -1.7403021806115442)
 	assert_almost_equal(d.log_probability(10), -4.0100334487345126)
 	assert_almost_equal(d.log_probability(1), -3.3905620875658995)
-	assert_equal(d.log_probability(-1), float("-inf"))
+	assert d.log_probability(-1) == float("-inf")
 
 	d = PoissonDistribution(0)
 
-	assert_equal(d.log_probability(1), float("-inf"))
-	assert_equal(d.log_probability(7), float("-inf"))
+	assert d.log_probability(1) == float("-inf")
+	assert d.log_probability(7) == float("-inf")
 
 	d.fit([1, 6, 4, 9, 1])
-	assert_equal(d.parameters[0], 4.2)
+	assert d.parameters[0] == 4.2
 
 	d.fit([1, 6, 4, 9, 1], weights=[0, 0, 0, 1, 0])
-	assert_equal(d.parameters[0], 9)
+	assert d.parameters[0] == 9
 
 	d.fit([1, 6, 4, 9, 1], weights=[1, 0, 0, 1, 0])
-	assert_equal(d.parameters[0], 5)
+	assert d.parameters[0] == 5
 
 	assert_almost_equal(d.log_probability(5), -1.7403021806115442)
 	assert_almost_equal(d.log_probability(10), -4.0100334487345126)
 	assert_almost_equal(d.log_probability(1), -3.3905620875658995)
-	assert_equal(d.log_probability(-1), float("-inf"))
+	assert d.log_probability(-1) == float("-inf")
 
 	e = pickle.loads(pickle.dumps(d))
-	assert_equal(e.name, "PoissonDistribution")
-	assert_equal(e.parameters[0], 5)
+	assert e.name == "PoissonDistribution"
+	assert e.parameters[0] == 5
 
 
 def test_distributions_poisson_random_sample():
@@ -701,14 +695,15 @@
 	x = numpy.array([0, 1, 2, 2, 0])
 
 	assert_array_almost_equal(d.sample(5, random_state=5), x)
-	assert_raises(AssertionError, assert_array_almost_equal, d.sample(5), x)
+	with pytest.raises(AssertionError):
+		assert_array_almost_equal(d.sample(5), x)
 
 def test_beta():
 	"""Test pickling of beta distribution."""
 	d = BetaDistribution(2, 3)
 	e = pickle.loads(pickle.dumps(d))
-	assert_equal(e.name, "BetaDistribution")
-	assert_equal(e.parameters, [2, 3])
+	assert e.name == "BetaDistribution"
+	assert e.parameters == [2, 3]
 
 def test_distributions_beta_random_sample():
 	d = BetaDistribution(1, 1)
@@ -716,45 +711,45 @@
 	x = numpy.array([0.612564, 0.098563, 0.735983, 0.583171, 0.69296 ])
 
 	assert_array_almost_equal(d.sample(5, random_state=5), x)
-	assert_raises(AssertionError, assert_array_almost_equal, d.sample(5), x)
+	with pytest.raises(AssertionError):
+		assert_array_almost_equal(d.sample(5), x)
 
-@with_setup(setup, teardown)
 def test_gaussian_kernel():
 	d = GaussianKernelDensity([0, 4, 3, 5, 7, 4, 2])
-	assert_equal(round(d.log_probability(3.3), 4), -1.7042)
+	assert round(d.log_probability(3.3), 4) == -1.7042
 
 	d.fit([1, 6, 8, 3, 2, 4, 7, 2])
-	assert_equal(round(d.log_probability(1.2), 4), -2.0237)
+	assert round(d.log_probability(1.2), 4) == -2.0237
 
 	d.fit([1, 0, 108], weights=[2., 3., 278.])
-	assert_equal(round(d.log_probability(110), 4), -2.9368)
-	assert_equal(round(d.log_probability(0), 4), -5.1262)
+	assert round(d.log_probability(110), 4) == -2.9368
+	assert round(d.log_probability(0), 4) == -5.1262
 
 	d.summarize([1, 6, 8, 3])
 	d.summarize([2, 4, 7])
 	d.summarize([2])
 	d.from_summaries()
-	assert_equal(round(d.log_probability(1.2), 4), -2.0237)
+	assert round(d.log_probability(1.2), 4) == -2.0237
 
 	d.summarize([1, 0, 108], weights=[2., 3., 278.])
 	d.from_summaries()
-	assert_equal(round(d.log_probability(110), 4), -2.9368)
-	assert_equal(round(d.log_probability(0), 4), -5.1262)
+	assert round(d.log_probability(110), 4) == -2.9368
+	assert round(d.log_probability(0), 4) == -5.1262
 
 	d.freeze()
 	d.fit([1, 3, 5, 4, 6, 7, 3, 4, 2])
-	assert_equal(round(d.log_probability(110), 4), -2.9368)
-	assert_equal(round(d.log_probability(0), 4), -5.1262)
+	assert round(d.log_probability(110), 4) == -2.9368
+	assert round(d.log_probability(0), 4) == -5.1262
 
 	e = Distribution.from_json(d.to_json())
-	assert_equal(e.name, "GaussianKernelDensity")
-	assert_equal(round(e.log_probability(110), 4), -2.9368)
-	assert_equal(round(e.log_probability(0), 4), -5.1262)
+	assert e.name == "GaussianKernelDensity"
+	assert round(e.log_probability(110), 4) == -2.9368
+	assert round(e.log_probability(0), 4) == -5.1262
 
 	f = pickle.loads(pickle.dumps(e))
-	assert_equal(f.name, "GaussianKernelDensity")
-	assert_equal(round(f.log_probability(110), 4), -2.9368)
-	assert_equal(round(f.log_probability(0), 4), -5.1262)
+	assert f.name == "GaussianKernelDensity"
+	assert round(f.log_probability(110), 4) == -2.9368
+	assert round(f.log_probability(0), 4) == -5.1262
 
 
 def test_distributions_gaussian_kernel_random_sample():
@@ -763,34 +758,34 @@
 	x = numpy.array([5.367586, 2.574708, 2.114238, 2.170925, 4.596907])
 
 	assert_array_almost_equal(d.sample(5, random_state=5), x)
-	assert_raises(AssertionError, assert_array_almost_equal, d.sample(5), x)
+	with pytest.raises(AssertionError):
+		assert_array_almost_equal(d.sample(5), x)
 
 
-@with_setup(setup, teardown)
 def test_triangular_kernel():
 	d = TriangleKernelDensity([1, 6, 3, 4, 5, 2])
-	assert_equal(round(d.log_probability(6.5), 4), -2.4849)
+	assert round(d.log_probability(6.5), 4) == -2.4849
 
 	d = TriangleKernelDensity([1, 8, 100])
-	assert_not_equal(round(d.log_probability(6.5), 4), -2.4849)
+	assert round(d.log_probability(6.5), 4) != -2.4849
 
 	d.summarize([1, 6])
 	d.summarize([3, 4, 5])
 	d.summarize([2])
 	d.from_summaries()
-	assert_equal(round(d.log_probability(6.5), 4), -2.4849)
+	assert round(d.log_probability(6.5), 4) == -2.4849
 
 	d.freeze()
 	d.fit([1, 4, 6, 7, 3, 5, 7, 8, 3, 3, 4])
-	assert_equal(round(d.log_probability(6.5), 4), -2.4849)
+	assert round(d.log_probability(6.5), 4) == -2.4849
 
 	e = Distribution.from_json(d.to_json())
-	assert_equal(e.name, "TriangleKernelDensity")
-	assert_equal(round(e.log_probability(6.5), 4), -2.4849)
+	assert e.name == "TriangleKernelDensity"
+	assert round(e.log_probability(6.5), 4) == -2.4849
 
 	f = pickle.loads(pickle.dumps(e))
-	assert_equal(f.name, "TriangleKernelDensity")
-	assert_equal(round(f.log_probability(6.5), 4), -2.4849)
+	assert f.name == "TriangleKernelDensity"
+	assert round(f.log_probability(6.5), 4) == -2.4849
 
 
 def test_distributions_triangle_kernel_random_sample():
@@ -799,36 +794,36 @@
 	x = numpy.array([4.118801, 2.31576 , 4.018591, 1.770455, 4.612734])
 
 	assert_array_almost_equal(d.sample(5, random_state=5), x)
-	assert_raises(AssertionError, assert_array_almost_equal, d.sample(5), x)
+	with pytest.raises(AssertionError):
+		assert_array_almost_equal(d.sample(5), x)
 
 
-@with_setup(setup, teardown)
 def test_uniform_kernel():
 	d = UniformKernelDensity([1, 3, 5, 6, 2, 2, 3, 2, 2])
 
-	assert_equal(round(d.log_probability(2.2), 4), -0.4055)
-	assert_equal(round(d.log_probability(6.2), 4), -2.1972)
-	assert_equal(d.log_probability(10), float('-inf'))
+	assert round(d.log_probability(2.2), 4) == -0.4055
+	assert round(d.log_probability(6.2), 4) == -2.1972
+	assert d.log_probability(10) == float('-inf')
 
 	d = UniformKernelDensity([1, 100, 200])
-	assert_not_equal(round(d.log_probability(2.2), 4), -0.4055)
-	assert_not_equal(round(d.log_probability(6.2), 4), -2.1972)
+	assert round(d.log_probability(2.2), 4) != -0.4055
+	assert round(d.log_probability(6.2), 4) != -2.1972
 
 	d.summarize([1, 3, 5, 6, 2])
 	d.summarize([2, 3, 2, 2])
 	d.from_summaries()
-	assert_equal(round(d.log_probability(2.2), 4), -0.4055)
-	assert_equal(round(d.log_probability(6.2), 4), -2.1972)
+	assert round(d.log_probability(2.2), 4) == -0.4055
+	assert round(d.log_probability(6.2), 4) == -2.1972
 
 	e = Distribution.from_json(d.to_json())
-	assert_equal(e.name, "UniformKernelDensity")
-	assert_equal(round(e.log_probability(2.2), 4), -0.4055)
-	assert_equal(round(e.log_probability(6.2), 4), -2.1972)
+	assert e.name == "UniformKernelDensity"
+	assert round(e.log_probability(2.2), 4) == -0.4055
+	assert round(e.log_probability(6.2), 4) == -2.1972
 
 	f = pickle.loads(pickle.dumps(e))
-	assert_equal(e.name, "UniformKernelDensity")
-	assert_equal(round(f.log_probability(2.2), 4), -0.4055)
-	assert_equal(round(f.log_probability(6.2), 4), -2.1972)
+	assert e.name == "UniformKernelDensity"
+	assert round(f.log_probability(2.2), 4) == -0.4055
+	assert round(f.log_probability(6.2), 4) == -2.1972
 
 
 def test_distributions_uniform_kernel_random_sample():
@@ -837,57 +832,57 @@
 	x = numpy.array([4.223488, 2.531816, 4.036836, 1.593601, 4.375442])
 
 	assert_array_almost_equal(d.sample(5, random_state=5), x)
-	assert_raises(AssertionError, assert_array_almost_equal, d.sample(5), x)
+	with pytest.raises(AssertionError):
+		assert_array_almost_equal(d.sample(5), x)
 
 
-@with_setup(setup, teardown)
 def test_bernoulli():
 	d = BernoulliDistribution(0.6)
-	assert_equal(d.probability(0), 0.4)
-	assert_equal(d.probability(1), 0.6)
-	assert_equal(d.parameters[0], 1-d.probability(0))
-	assert_equal(d.parameters[0], d.probability(1))
+	assert d.probability(0) == 0.4
+	assert d.probability(1) == 0.6
+	assert d.parameters[0] == 1-d.probability(0)
+	assert d.parameters[0] == d.probability(1)
 
 	d.fit([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
-	assert_not_equal(d.probability(1), 1.0)
-	assert_equal(d.probability(0), 1.0)
+	assert d.probability(1) != 1.0
+	assert d.probability(0) == 1.0
 
 	a = [0.0, 0.0, 0.0]
 	b = [1.0, 1.0, 1.0]
 	c = [1.0, 1.0, 1.0]
 	d.summarize(a)
 	d.from_summaries()
-	assert_equal(d.probability(0), 1)
-	assert_equal(d.probability(1), 0)
+	assert d.probability(0) == 1
+	assert d.probability(1) == 0
 
 	d.summarize(a)
 	d.summarize(b)
 	d.from_summaries()
-	assert_equal(d.probability(0), 0.5)
-	assert_equal(d.probability(1), 0.5)
-	assert_equal(d.parameters[0], d.probability(0))
-	assert_equal(d.parameters[0], d.probability(1))
+	assert d.probability(0) == 0.5
+	assert d.probability(1) == 0.5
+	assert d.parameters[0] == d.probability(0)
+	assert d.parameters[0] == d.probability(1)
 
 	d.summarize(a)
 	d.summarize(b)
 	d.summarize(c)
 	d.from_summaries()
-	assert_equal(round(d.probability(0), 4), 0.3333)
-	assert_equal(round(d.probability(1), 4), 0.6667)
-	assert_equal(d.parameters[0], d.probability(1))
+	assert round(d.probability(0), 4) == 0.3333
+	assert round(d.probability(1), 4) == 0.6667
+	assert d.parameters[0] == d.probability(1)
 
 	d = BernoulliDistribution.from_samples([0.0, 0.0, 0.0, 0.0, 0.0, 1.0])
-	assert_equal(round(d.probability(0), 4), 0.8333)
-	assert_equal(round(d.probability(1), 4), 0.1667)
+	assert round(d.probability(0), 4) == 0.8333
+	assert round(d.probability(1), 4) == 0.1667
 	assert_almost_equal(d.parameters[0], d.probability(1))
 
 	e = Distribution.from_json(d.to_json())
-	assert_equal(e.name, "BernoulliDistribution")
-	assert_equal(round(e.parameters[0], 4), 0.1667)
+	assert e.name == "BernoulliDistribution"
+	assert round(e.parameters[0], 4) == 0.1667
 
 	f = pickle.loads(pickle.dumps(e))
-	assert_equal(f.name, "BernoulliDistribution")
-	assert_equal(round(f.parameters[0], 4), 0.1667)
+	assert f.name == "BernoulliDistribution"
+	assert round(f.parameters[0], 4) == 0.1667
 
 def test_distributions_uniform_kernel_random_sample():
 	d = BernoulliDistribution(0.2)
@@ -896,48 +891,48 @@
 			0, 0, 0, 0, 0])
 
 	assert_array_equal(d.sample(20, random_state=5), x)
-	assert_raises(AssertionError, assert_array_equal, d.sample(20), x)
+	with pytest.raises(AssertionError):
+		assert_array_equal(d.sample(20), x)
 
-@with_setup(setup, teardown)
 def test_independent():
 	d = IndependentComponentsDistribution(
 		[NormalDistribution(5, 2), ExponentialDistribution(2)])
 
-	assert_equal(round(d.log_probability((4, 1)), 4), -3.0439)
-	assert_equal(round(d.log_probability((100, 0.001)), 4), -1129.0459)
+	assert round(d.log_probability((4, 1)), 4) == -3.0439
+	assert round(d.log_probability((100, 0.001)), 4) == -1129.0459
 
 	d = IndependentComponentsDistribution([NormalDistribution(5, 2),
 										   ExponentialDistribution(2)],
 										  weights=[18., 1.])
 
-	assert_equal(round(d.log_probability((4, 1)), 4), -32.5744)
-	assert_equal(round(d.log_probability((100, 0.001)), 4), -20334.5764)
+	assert round(d.log_probability((4, 1)), 4) == -32.5744
+	assert round(d.log_probability((100, 0.001)), 4) == -20334.5764
 
 	d.fit([(5, 1), (5.2, 1.7), (4.7, 1.9), (4.9, 2.4), (4.5, 1.2)])
 
-	assert_equal(round(d.parameters[0][0].parameters[0], 4), 4.86)
-	assert_equal(round(d.parameters[0][0].parameters[1], 4), 0.2417)
-	assert_equal(round(d.parameters[0][1].parameters[0], 4), 0.6098)
+	assert round(d.parameters[0][0].parameters[0], 4) == 4.86
+	assert round(d.parameters[0][0].parameters[1], 4) == 0.2417
+	assert round(d.parameters[0][1].parameters[0], 4) == 0.6098
 
 	d = IndependentComponentsDistribution([NormalDistribution(5, 2),
 										   UniformDistribution(0, 10)])
 	d.fit([(0, 0), (5, 0), (3, 0), (5, -5), (7, 0),
 		   (3, 0), (4, 0), (5, 0), (2, 20)], inertia=0.5)
 
-	assert_equal(round(d.parameters[0][0].parameters[0], 4), 4.3889)
-	assert_equal(round(d.parameters[0][0].parameters[1], 4), 1.9655)
+	assert round(d.parameters[0][0].parameters[0], 4) == 4.3889
+	assert round(d.parameters[0][0].parameters[1], 4) == 1.9655
 
-	assert_equal(d.parameters[0][1].parameters[0], -2.5)
-	assert_equal(d.parameters[0][1].parameters[1], 15)
+	assert d.parameters[0][1].parameters[0] == -2.5
+	assert d.parameters[0][1].parameters[1] == 15
 
 	d.fit([(0, 0), (5, 0), (3, 0), (5, -5), (7, 0),
 		   (3, 0), (4, 0), (5, 0), (2, 20)], inertia=0.75)
 
-	assert_not_equal(round(d.parameters[0][0].parameters[0], 4), 4.3889)
-	assert_not_equal(round(d.parameters[0][0].parameters[1], 4), 1.9655)
+	assert round(d.parameters[0][0].parameters[0], 4) != 4.3889
+	assert round(d.parameters[0][0].parameters[1], 4) != 1.9655
 
-	assert_not_equal(d.parameters[0][1].parameters[0], -2.5)
-	assert_not_equal(d.parameters[0][1].parameters[1], 15)
+	assert d.parameters[0][1].parameters[0] != -2.5
+	assert d.parameters[0][1].parameters[1] != 15
 
 	d = IndependentComponentsDistribution([NormalDistribution(5, 2),
 										   UniformDistribution(0, 10)])
@@ -947,45 +942,45 @@
 	d.summarize([(3, 0), (4, 0), (5, 0), (2, 20)])
 	d.from_summaries(inertia=0.5)
 
-	assert_equal(round(d.parameters[0][0].parameters[0], 4), 4.3889)
-	assert_equal(round(d.parameters[0][0].parameters[1], 4), 1.9655)
+	assert round(d.parameters[0][0].parameters[0], 4) == 4.3889
+	assert round(d.parameters[0][0].parameters[1], 4) == 1.9655
 
-	assert_equal(d.parameters[0][1].parameters[0], -2.5)
-	assert_equal(d.parameters[0][1].parameters[1], 15)
+	assert d.parameters[0][1].parameters[0] == -2.5
+	assert d.parameters[0][1].parameters[1] == 15
 
 	d.freeze()
 	d.fit([(1, 7), (7, 2), (2, 4), (2, 4), (1, 4)])
 
-	assert_equal(round(d.parameters[0][0].parameters[0], 4), 4.3889)
-	assert_equal(round(d.parameters[0][0].parameters[1], 4), 1.9655)
+	assert round(d.parameters[0][0].parameters[0], 4) == 4.3889
+	assert round(d.parameters[0][0].parameters[1], 4) == 1.9655
 
-	assert_equal(d.parameters[0][1].parameters[0], -2.5)
-	assert_equal(d.parameters[0][1].parameters[1], 15)
+	assert d.parameters[0][1].parameters[0] == -2.5
+	assert d.parameters[0][1].parameters[1] == 15
 
 	e = Distribution.from_json(d.to_json())
-	assert_equal(e.name, "IndependentComponentsDistribution")
+	assert e.name == "IndependentComponentsDistribution"
 
-	assert_equal(round(e.parameters[0][0].parameters[0], 4), 4.3889)
-	assert_equal(round(e.parameters[0][0].parameters[1], 4), 1.9655)
+	assert round(e.parameters[0][0].parameters[0], 4) == 4.3889
+	assert round(e.parameters[0][0].parameters[1], 4) == 1.9655
 
-	assert_equal(e.parameters[0][1].parameters[0], -2.5)
-	assert_equal(e.parameters[0][1].parameters[1], 15)
+	assert e.parameters[0][1].parameters[0] == -2.5
+	assert e.parameters[0][1].parameters[1] == 15
 
 	f = pickle.loads(pickle.dumps(e))
-	assert_equal(e.name, "IndependentComponentsDistribution")
+	assert e.name == "IndependentComponentsDistribution"
 
-	assert_equal(round(f.parameters[0][0].parameters[0], 4), 4.3889)
-	assert_equal(round(f.parameters[0][0].parameters[1], 4), 1.9655)
+	assert round(f.parameters[0][0].parameters[0], 4) == 4.3889
+	assert round(f.parameters[0][0].parameters[1], 4) == 1.9655
 
-	assert_equal(f.parameters[0][1].parameters[0], -2.5)
-	assert_equal(f.parameters[0][1].parameters[1], 15)
+	assert f.parameters[0][1].parameters[0] == -2.5
+	assert f.parameters[0][1].parameters[1] == 15
 
 	X = numpy.array([[0.5, 0.2, 0.7],
-		          [0.3, 0.1, 0.9],
-		          [0.4, 0.3, 0.8],
-		          [0.3, 0.3, 0.9],
-		          [0.3, 0.2, 0.6],
-		          [0.5, 0.2, 0.8]])
+				  [0.3, 0.1, 0.9],
+				  [0.4, 0.3, 0.8],
+				  [0.3, 0.3, 0.9],
+				  [0.3, 0.2, 0.6],
+				  [0.5, 0.2, 0.8]])
 
 	d = IndependentComponentsDistribution.from_samples(X,
 		distributions=NormalDistribution)
@@ -1031,26 +1026,27 @@
 					 [9.861542, 2.067192, 0.033083, 2.644041]])
 
 	assert_array_almost_equal(d.sample(3, random_state=5), x)
-	assert_raises(AssertionError, assert_array_almost_equal, d.sample(5), x)
+	with pytest.raises(AssertionError):
+		assert_array_almost_equal(d.sample(5), x)
 
 
 def test_conditional():
 	phditis = DiscreteDistribution({True: 0.01, False: 0.99})
 	test_result = ConditionalProbabilityTable(
-		[[True,  True,  0.95],
+		[[True,  True,	0.95],
 		 [True,  False, 0.05],
-		 [False, True,  0.05],
+		 [False, True,	0.05],
 		 [False, False, 0.95]], [phditis])
 
 	assert discrete_equality(test_result.marginal(),
 							 DiscreteDistribution({False: 0.941, True: 0.059}))
 
 
-def setup_cpt():
+@pytest.fixture
+def cpt():
 	guest = DiscreteDistribution({'A': 1. / 3, 'B': 1. / 3, 'C': 1. / 3})
 	prize = DiscreteDistribution({'A': 1. / 3, 'B': 1. / 3, 'C': 1. / 3})
 
-	global monty
 	monty = ConditionalProbabilityTable(
 		[['A', 'A', 'A', 0.0],
 		 ['A', 'A', 'B', 0.5],
@@ -1080,9 +1076,8 @@
 		 ['C', 'C', 'B', 0.5],
 		 ['C', 'C', 'C', 0.0]], [guest, prize])
 
-	global X
 	X = [['A', 'A', 'C'],
-	 	['A', 'A', 'B'],
+		['A', 'A', 'B'],
 		['A', 'A', 'C'],
 		['A', 'A', 'B'],
 		['A', 'A', 'A'],
@@ -1113,10 +1108,8 @@
 		['C', 'C', 'C'],
 		['C', 'C', 'C']]
 
-
-	global X_nan
 	X_nan = [['nan', 'A', 'C'],
-	 	['A', 'A', 'nan'],
+		['A', 'A', 'nan'],
 		['A', 'nan', 'C'],
 		['A', 'A', 'B'],
 		['A', 'A', 'A'],
@@ -1146,11 +1139,12 @@
 		['C', 'nan', 'C'],
 		['C', 'C', 'C'],
 		['C', 'C', 'C']]
+	return monty, X, X_nan
 
 
-@with_setup(setup_cpt)
-def test_distributions_cpt_initialization():
-	assert_equal(monty.name, "ConditionalProbabilityTable")
+def test_distributions_cpt_initialization(cpt):
+	monty, X, X_nan = cpt
+	assert monty.name == "ConditionalProbabilityTable"
 	assert_array_equal(monty.parameters[0], [['A', 'A', 'A', 0.0],
 		 ['A', 'A', 'B', 0.5],
 		 ['A', 'A', 'C', 0.5],
@@ -1179,52 +1173,52 @@
 		 ['C', 'C', 'B', 0.5],
 		 ['C', 'C', 'C', 0.0]])
 
-	assert_equal(monty.n_columns, 3)
-	assert_equal(monty.m, 2)
+	assert monty.n_columns == 3
+	assert monty.m == 2
 
 
-@with_setup(setup_cpt)
-def test_distributions_cpt_log_probability():
-	assert_equal(monty.log_probability(('A', 'B', 'C')), 0.)
-	assert_equal(monty.log_probability(('C', 'B', 'A')), 0.)
-	assert_equal(monty.log_probability(('C', 'C', 'C')), -inf)
-	assert_equal(monty.log_probability(('A', 'A', 'A')), -inf)
-	assert_equal(monty.log_probability(('B', 'A', 'C')), 0.)
-	assert_equal(monty.log_probability(('C', 'A', 'B')), 0.)
-
-
-@with_setup(setup_cpt)
-def test_distributions_cpt_nan_log_probability():
-	assert_equal(monty.log_probability(('A', 'nan', 'C')), 0.)
-	assert_equal(monty.log_probability(('nan', 'nan', 'C')), 0.)
-	assert_equal(monty.log_probability(('A', 'nan', 'nan')), 0.)
-	assert_equal(monty.log_probability(('nan', 'nan', 'nan')), 0.)
-	assert_equal(monty.log_probability(('nan', 'B', 'C')), 0.)
-	assert_equal(monty.log_probability(('A', 'B', 'nan')), 0.)
-
-
-@with_setup(setup_cpt)
-def test_distributions_cpt_probability():
-	assert_equal(monty.probability(('A', 'B', 'C')), 1.)
-	assert_equal(monty.probability(('C', 'B', 'A')), 1.)
-	assert_equal(monty.probability(('C', 'C', 'C')), 0.)
-	assert_equal(monty.probability(('A', 'A', 'A')), 0.)
-	assert_equal(monty.probability(('B', 'A', 'C')), 1.)
-	assert_equal(monty.probability(('C', 'A', 'B')), 1.)
-
-
-@with_setup(setup_cpt)
-def test_distributions_cpt_nan_probability():
-	assert_equal(monty.probability(('A', 'nan', 'C')), 1.)
-	assert_equal(monty.probability(('nan', 'nan', 'C')), 1.)
-	assert_equal(monty.probability(('A', 'nan', 'nan')), 1.)
-	assert_equal(monty.probability(('nan', 'nan', 'nan')), 1.)
-	assert_equal(monty.probability(('nan', 'B', 'C')), 1.)
-	assert_equal(monty.probability(('A', 'B', 'nan')), 1.)
+def test_distributions_cpt_log_probability(cpt):
+	monty, X, X_nan = cpt
+	assert monty.log_probability(('A', 'B', 'C')) == 0.
+	assert monty.log_probability(('C', 'B', 'A')) == 0.
+	assert monty.log_probability(('C', 'C', 'C')) == -inf
+	assert monty.log_probability(('A', 'A', 'A')) == -inf
+	assert monty.log_probability(('B', 'A', 'C')) == 0.
+	assert monty.log_probability(('C', 'A', 'B')) == 0.
+
+
+def test_distributions_cpt_nan_log_probability(cpt):
+	monty, X, X_nan = cpt
+	assert monty.log_probability(('A', 'nan', 'C')) == 0.
+	assert monty.log_probability(('nan', 'nan', 'C')) == 0.
+	assert monty.log_probability(('A', 'nan', 'nan')) == 0.
+	assert monty.log_probability(('nan', 'nan', 'nan')) == 0.
+	assert monty.log_probability(('nan', 'B', 'C')) == 0.
+	assert monty.log_probability(('A', 'B', 'nan')) == 0.
+
+
+def test_distributions_cpt_probability(cpt):
+	monty, X, X_nan = cpt
+	assert monty.probability(('A', 'B', 'C')) == 1.
+	assert monty.probability(('C', 'B', 'A')) == 1.
+	assert monty.probability(('C', 'C', 'C')) == 0.
+	assert monty.probability(('A', 'A', 'A')) == 0.
+	assert monty.probability(('B', 'A', 'C')) == 1.
+	assert monty.probability(('C', 'A', 'B')) == 1.
+
+
+def test_distributions_cpt_nan_probability(cpt):
+	monty, X, X_nan = cpt
+	assert monty.probability(('A', 'nan', 'C')) == 1.
+	assert monty.probability(('nan', 'nan', 'C')) == 1.
+	assert monty.probability(('A', 'nan', 'nan')) == 1.
+	assert monty.probability(('nan', 'nan', 'nan')) == 1.
+	assert monty.probability(('nan', 'B', 'C')) == 1.
+	assert monty.probability(('A', 'B', 'nan')) == 1.
 
 
-@with_setup(setup_cpt)
-def test_distributions_cpt_fit():
+def test_distributions_cpt_fit(cpt):
+	monty, X, X_nan = cpt
 	monty.fit(X)
 
 	assert_array_equal(monty.parameters[0],
@@ -1257,8 +1251,8 @@
 		['C', 'C', 'C', 0.75]])
 
 
-@with_setup(setup_cpt)
-def test_distributions_cpt_nan_fit():
+def test_distributions_cpt_nan_fit(cpt):
+	monty, X, X_nan = cpt
 	monty.fit(X_nan)
 
 	assert_array_equal(monty.parameters[0],
@@ -1291,8 +1285,8 @@
 		['C', 'C', 'C', 1.0]])
 
 
-@with_setup(setup_cpt)
-def test_distributions_cpt_exclusive_nan_fit():
+def test_distributions_cpt_exclusive_nan_fit(cpt):
+	monty, X, X_nan = cpt
 	X = [['nan', 'nan', 'nan'],
 		 ['nan', 'nan', 'nan'],
 		 ['nan', 'nan', 'nan'],
@@ -1331,8 +1325,8 @@
 		 ['C', 'C', 'C', 0.0]])
 
 
-@with_setup(setup_cpt)
-def test_distributions_cpt_weighted_fit():
+def test_distributions_cpt_weighted_fit(cpt):
+	monty, X, X_nan = cpt
 	weights = [1, 3, 2, 3, 7, 4, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 1, 2, 1, 3, 1,
 		1, 1, 2, 1, 1, 1, 3, 1, 1, 1]
 
@@ -1400,11 +1394,12 @@
 	d = MultivariateGaussianDistribution(mu, cov)
 
 	x = numpy.array([[5.441227, 0.66913 ],
-			         [7.430771, 0.747908],
-			         [5.10961 , 2.582481]])
+					 [7.430771, 0.747908],
+					 [5.10961 , 2.582481]])
 
 	assert_array_almost_equal(d.sample(3, random_state=5), x)
-	assert_raises(AssertionError, assert_array_almost_equal, d.sample(5), x)
+	with pytest.raises(AssertionError):
+		assert_array_almost_equal(d.sample(5), x)
 
 
 def test_distributions_independent_random_sample():
@@ -1418,7 +1413,8 @@
 					 [9.861542, 2.067192, 0.033083, 2.644041]])
 
 	assert_array_almost_equal(d.sample(3, random_state=5), x)
-	assert_raises(AssertionError, assert_array_almost_equal, d.sample(5), x)
+	with pytest.raises(AssertionError):
+		assert_array_almost_equal(d.sample(5), x)
 
 def test_cpd_sampling():
 	d1 = DiscreteDistribution({"A": 0.1, "B": 0.9})
@@ -1458,12 +1454,13 @@
 		[["A", "A", 0.1], ["A", "B", 0.9], ["B", "A", 0.7], ["B", "B", 0.3]],
 		[d1])
 
-    # Not true with actual seed
+	# Not true with actual seed
 	# x = numpy.array(['B', 'A', 'B', 'B', 'A', 'B', 'A', 'A', 'B', 'A', 'A', 
-	# 	'B', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A'])
+	#	'B', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A'])
 
 	x = numpy.array(['B', 'A', 'B', 'A', 'A', 'B', 'A', 'A', 'A', 'A', 'B', 'A', 'A',
-           'B', 'A', 'A', 'A', 'A', 'A', 'A'])
+		   'B', 'A', 'A', 'A', 'A', 'A', 'A'])
 
 	assert_array_equal(d.sample(n=20, random_state=5), x)
-	assert_raises(AssertionError, assert_array_equal, d.sample(n=10), x)
+	with pytest.raises(AssertionError):
+		assert_array_equal(d.sample(n=10), x)
--- python-pomegranate.orig/tests/test_factor_graphs.py
+++ python-pomegranate/tests/test_factor_graphs.py
@@ -1,19 +1,5 @@
 from pomegranate import *
 
-from .tools import assert_equal
-
-def setup():
-    '''
-    No setup or teardown needs to be done in this case.
-    '''
-    pass
-
-
-def teardown():
-    '''
-    No setup or teardown needs to be done in this case.
-    '''
-    pass
 
 def test_json():
     d1 = DiscreteDistribution({"A": 0.1, "B": 0.9})
@@ -28,11 +14,11 @@
     bayes_net.bake()
     fg = bayes_net.graph
     also_fg = FactorGraph.from_json(fg.to_json())
-    assert_equal(fg.to_json(), also_fg.to_json())
-    assert_equal(len(fg.edges), len(also_fg.edges))
+    assert fg.to_json() == also_fg.to_json()
+    assert len(fg.edges) == len(also_fg.edges)
     for e1, e2 in zip(fg.edges, also_fg.edges):
-        assert_equal(e1[0], e2[0])
-        assert_equal(e1[1], e2[1])
+        assert e1[0] == e2[0]
+        assert e1[1] == e2[1]
 
 def test_robust_json():
     d1 = DiscreteDistribution({"A": 0.1, "B": 0.9})
@@ -47,8 +33,8 @@
     bayes_net.bake()
     fg = bayes_net.graph
     also_fg = from_json(fg.to_json())
-    assert_equal(fg.to_json(), also_fg.to_json())
-    assert_equal(len(fg.edges), len(also_fg.edges))
+    assert fg.to_json() == also_fg.to_json()
+    assert len(fg.edges) == len(also_fg.edges)
     for e1, e2 in zip(fg.edges, also_fg.edges):
-        assert_equal(e1[0], e2[0])
-        assert_equal(e1[1], e2[1])
+        assert e1[0] == e2[0]
+        assert e1[1] == e2[1]
--- python-pomegranate.orig/tests/test_gmm.py
+++ python-pomegranate/tests/test_gmm.py
@@ -2,12 +2,6 @@
 from pomegranate.io import DataGenerator
 from pomegranate.io import DataFrameGenerator
 
-from .tools import with_setup
-from .tools import assert_true
-from .tools import assert_equal
-from .tools import assert_greater
-from .tools import assert_raises
-from .tools import assert_not_equal
 from numpy.testing import assert_almost_equal
 from numpy.testing import assert_array_equal
 from numpy.testing import assert_array_almost_equal
@@ -16,31 +10,35 @@
 import random
 import pickle
 import numpy as np
+import pytest
 
 np.random.seed(0)
 random.seed(0)
 
-nan = numpy.nan
+nan = np.nan
+
 
 def setup_nothing():
 	pass
 
-def setup_multivariate_gaussian():
+
+@pytest.fixture
+def multivariate_gaussian():
 	"""
 	Set up a five component Gaussian mixture model, where each component
 	is a multivariate Gaussian distribution.
 	"""
 
-	global gmm
-
 	mu = np.arange(5)
 	cov = np.eye(5)
 
 	mgs = [MultivariateGaussianDistribution(mu*i, cov) for i in range(5)]
 	gmm = GeneralMixtureModel(mgs)
+	return gmm
 
 
-def setup_multivariate_mixed():
+@pytest.fixture
+def multivariate_mixed():
 	d11 = NormalDistribution(1, 1)
 	d12 = ExponentialDistribution(5)
 	d13 = LogNormalDistribution(0.5, 0.78)
@@ -55,52 +53,53 @@
 	d25 = PoissonDistribution(6)
 	d2 = IndependentComponentsDistribution([d21, d22, d23, d24, d25])
 
-	global gmm
 	gmm = GeneralMixtureModel([d1, d2])
+	return gmm
 
 
-def setup_univariate_gaussian():
+@pytest.fixture
+def univariate_gaussian():
 	"""
 	Set up a three component univariate Gaussian model.
 	"""
 
-	global gmm
 	gmm = GeneralMixtureModel([NormalDistribution(i*3, 1) for i in range(3)])
+	return gmm
 
 
-def setup_univariate_mixed():
+@pytest.fixture
+def univariate_mixed():
 	"""Set up a four component univariate mixed model."""
 
-	global gmm
 	d1 = ExponentialDistribution(5)
 	d2 = NormalDistribution(0, 1.2)
 	d3 = LogNormalDistribution(0.3, 1.4)
 	d4 = PoissonDistribution(5)
 	gmm = GeneralMixtureModel([d1, d2, d3, d4])
+	return gmm
 
 
-def setup_multivariate_discrete():
+@pytest.fixture
+def multivariate_discrete():
 	d1 = IndependentComponentsDistribution([DiscreteDistribution({'A':0.5, 'B':0.5}), 
-	                                        DiscreteDistribution({'0':0.2, '1':0.2, '2':0.2, '3':0.4})])
+											DiscreteDistribution({'0':0.2, '1':0.2, '2':0.2, '3':0.4})])
 	d2 = IndependentComponentsDistribution([DiscreteDistribution({'A':0.1, 'B':0.9}), 
-	                                        DiscreteDistribution({'0':0.1, '1':0.6, '2':0.1, '3':0.2})])
+											DiscreteDistribution({'0':0.1, '1':0.6, '2':0.1, '3':0.2})])
 
-
-	global gmm
 	gmm = GeneralMixtureModel([d1, d2], weights=np.array([0.4,0.6]))
+	return gmm
 
 
-def setup_multivariate_mixed_discrete_other():
+@pytest.fixture
+def multivariate_mixed_discrete_other():
 	d1 = IndependentComponentsDistribution([PoissonDistribution(4.0),
-	                                        DiscreteDistribution({'A':0.5, 'B':0.5}), 
-	                                        DiscreteDistribution({'0':0.2, '1':0.2, '2':0.2, '3':0.4})])
+											DiscreteDistribution({'A':0.5, 'B':0.5}), 
+											DiscreteDistribution({'0':0.2, '1':0.2, '2':0.2, '3':0.4})])
 	d2 = IndependentComponentsDistribution([PoissonDistribution(1.0),
-	                                        DiscreteDistribution({'A':0.1, 'B':0.9}), 
-	                                        DiscreteDistribution({'0':0.1, '1':0.6, '2':0.1, '3':0.2})])
-
-
-	global gmm
+											DiscreteDistribution({'A':0.1, 'B':0.9}), 
+											DiscreteDistribution({'0':0.1, '1':0.6, '2':0.1, '3':0.2})])
 	gmm = GeneralMixtureModel([d1, d2], weights=np.array([0.4,0.6]))
+	return gmm
 
 def teardown():
 	"""
@@ -110,8 +109,8 @@
 	pass
 
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_gmm_multivariate_gaussian_log_probability():
+def test_gmm_multivariate_gaussian_log_probability(multivariate_gaussian):
+	gmm = multivariate_gaussian
 	X = numpy.array([[1.1, 2.7, 3.0, 4.8, 6.2],
 					[1.8, 2.1, 3.1, 5.2, 6.5],
 					[0.9, 2.2, 3.2, 5.0, 5.8],
@@ -128,8 +127,8 @@
 	assert_array_almost_equal(logp, logp_t)
 
 
-@with_setup(setup_multivariate_mixed, teardown)
-def test_gmm_multivariate_mixed_log_probability():
+def test_gmm_multivariate_mixed_log_probability(multivariate_mixed):
+	gmm = multivariate_mixed
 	X = numpy.array([[1.1, 2.7, 3.0, 4.8, 6.2],
 					[1.8, 2.1, 3.1, 5.2, 6.5],
 					[0.9, 2.2, 3.2, 5.0, 5.8],
@@ -146,8 +145,8 @@
 	assert_array_almost_equal(logp, logp_t)
 
 
-@with_setup(setup_univariate_gaussian, teardown)
-def test_gmm_univariate_gaussian_log_probability():
+def test_gmm_univariate_gaussian_log_probability(univariate_gaussian):
+	gmm = univariate_gaussian
 	X = np.array([[1.1], [2.7], [3.0], [4.8], [6.2]])
 	logp = [-2.35925975, -2.03120691, -1.99557605, -2.39638244, -2.03147258]
 	assert_array_almost_equal(gmm.log_probability(X), logp)
@@ -197,8 +196,8 @@
 	assert_array_almost_equal(gmm.log_probability(X, batch_size=2), logp)
 
 
-@with_setup(setup_univariate_mixed, teardown)
-def test_gmm_mixed_log_probability():
+def test_gmm_mixed_log_probability(univariate_mixed):
+	gmm = univariate_mixed
 	X = np.array([[1.1], [2.7], [3.0], [4.8], [6.2]])
 	logp = [-2.01561437061559, -2.7951359521294536, -2.8314639809821918,
 			-2.9108132001193265, -3.1959940375620945]
@@ -256,8 +255,8 @@
 	assert_array_almost_equal(gmm.log_probability(X, batch_size=2), logp)
 
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_gmm_multivariate_gaussian_json():
+def test_gmm_multivariate_gaussian_json(multivariate_gaussian):
+	gmm = multivariate_gaussian
 	gmm_2 = GeneralMixtureModel.from_json(gmm.to_json())
 
 	X = np.array([[1.1, 2.7, 3.0, 4.8, 6.2]])
@@ -285,8 +284,8 @@
 	assert_almost_equal(gmm_2.log_probability(X).sum(), -10.7922, 4)
 
 
-@with_setup(setup_multivariate_mixed, teardown)
-def test_gmm_multivariate_mixed_json():
+def test_gmm_multivariate_mixed_json(multivariate_mixed):
+	gmm = multivariate_mixed
 	gmm2 = GeneralMixtureModel.from_json(gmm.to_json())
 
 	X = numpy.array([[1.1, 2.7, 3.0, 4.8, 6.2],
@@ -307,8 +306,8 @@
 	assert_array_almost_equal(logp1, logp2)
 
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_gmm_multivariate_gaussian_robust_from_json():
+def test_gmm_multivariate_gaussian_robust_from_json(multivariate_gaussian):
+	gmm = multivariate_gaussian
 	gmm_2 = from_json(gmm.to_json())
 
 	X = np.array([[1.1, 2.7, 3.0, 4.8, 6.2]])
@@ -336,8 +335,8 @@
 	assert_almost_equal(gmm_2.log_probability(X).sum(), -10.7922, 4)
 
 
-@with_setup(setup_multivariate_mixed, teardown)
-def test_gmm_multivariate_mixed_robust_from_json():
+def test_gmm_multivariate_mixed_robust_from_json(multivariate_mixed):
+	gmm = multivariate_mixed
 	gmm2 = from_json(gmm.to_json())
 
 	X = numpy.array([[1.1, 2.7, 3.0, 4.8, 6.2],
@@ -358,22 +357,22 @@
 	assert_array_almost_equal(logp1, logp2)
 
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_gmm_multivariate_gaussian_predict_log_proba():
+def test_gmm_multivariate_gaussian_predict_log_proba(multivariate_gaussian):
+	gmm = multivariate_gaussian
 	posterior = np.array([[-2.10001234e+01, -1.23402948e-04, -9.00012340e+00, -4.80001234e+01, -1.17000123e+02],
-                          [-2.30009115e+01, -9.11466556e-04, -7.00091147e+00, -4.40009115e+01, -1.11000911e+02]])
+						  [-2.30009115e+01, -9.11466556e-04, -7.00091147e+00, -4.40009115e+01, -1.11000911e+02]])
 
 	X = np.array([[2., 5., 7., 3., 2.],
-		          [1., 2., 5., 2., 5.]])
+				  [1., 2., 5., 2., 5.]])
 
 	assert_almost_equal(gmm.predict_log_proba(X), posterior, 4)
 	assert_almost_equal(numpy.exp(gmm.predict_log_proba(X)), gmm.predict_proba(X), 4)
 
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_gmm_multivariate_gaussian_predict():
+def test_gmm_multivariate_gaussian_predict(multivariate_gaussian):
+	gmm = multivariate_gaussian
 	X = np.array([[2., 5., 7., 3., 2.],
-		          [1., 2., 5., 2., 5.],
+				  [1., 2., 5., 2., 5.],
 				  [2., 1., 8., 2., 1.],
 				  [4., 3., 8., 1., 2.]])
 
@@ -389,13 +388,13 @@
 	gmm = GeneralMixtureModel([d1, d2])
 
 	X = np.array([[0.1,  0.7],
-		          [1.8,  2.1],
-		          [-0.9, -1.2],
-		          [-0.0,  0.2],
-		          [1.4,  2.9],
-		          [1.8,  2.5],
-		          [1.4,  3.1],
-		          [1.0,  1.0]])
+				  [1.8,  2.1],
+				  [-0.9, -1.2],
+				  [-0.0,  0.2],
+				  [1.4,  2.9],
+				  [1.8,  2.5],
+				  [1.4,  3.1],
+				  [1.0,  1.0]])
 
 	_, history = gmm.fit(X, return_history=True)
 	total_improvement = history.total_improvement[-1]
@@ -403,136 +402,114 @@
 	assert_almost_equal(total_improvement, 15.242416, 4)
 
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_gmm_multivariate_gaussian_fit_iterations():
-	numpy.random.seed(0)
-	X = numpy.concatenate([numpy.random.normal(i, 1, size=(100, 5)) for i in range(2)])
-
-	gmms = []
-	for i in range(3):
-		mu, cov = numpy.ones(5), numpy.eye(5)
-		d = [MultivariateGaussianDistribution(mu*i, cov) for i in range(2)]
-		gmms.append(GeneralMixtureModel(d))
-	
-	gmm, gmm2, gmm3 = gmms
-
-	gmm.fit(X)
-	gmm2.fit(X, max_iterations=1)
-	gmm3.fit(X, max_iterations=1)
-
-	logp1 = gmm.log_probability(X).sum()
-	logp2 = gmm2.log_probability(X).sum()
-	logp3 = gmm3.log_probability(X).sum()
+def test_gmm_multivariate_gaussian_fit_iterations(multivariate_gaussian):
+    gmm = multivariate_gaussian
+    numpy.random.seed(0)
+    X = numpy.concatenate([numpy.random.normal(i, 1, size=(100, 5)) for i in range(2)])
+
+    gmms = []
+    for i in range(3):
+        mu, cov = numpy.ones(5), numpy.eye(5)
+        d = [MultivariateGaussianDistribution(mu*i, cov) for i in range(2)]
+        gmms.append(GeneralMixtureModel(d))
+
+    gmm, gmm2, gmm3 = gmms
+
+    gmm.fit(X)
+    gmm2.fit(X, max_iterations=1)
+    gmm3.fit(X, max_iterations=1)
+
+    logp1 = gmm.log_probability(X).sum()
+    logp2 = gmm2.log_probability(X).sum()
+    logp3 = gmm3.log_probability(X).sum()
 
-	assert_greater(logp1, logp2)
-	assert_equal(logp2, logp3)
+    assert logp1 > logp2
+    assert logp2 == logp3
 
 
 def test_gmm_initialization():
-	assert_raises(ValueError, GeneralMixtureModel, [])
+    with pytest.raises(ValueError):
+        GeneralMixtureModel([])
 
-	assert_raises(TypeError, GeneralMixtureModel, [NormalDistribution(5, 2), MultivariateGaussianDistribution([5, 2], [[1, 0], [0, 1]])])
-	assert_raises(TypeError, GeneralMixtureModel, [NormalDistribution(5, 2), NormalDistribution])
-
-	X = numpy.concatenate((numpy.random.randn(300, 5) + 0.5, numpy.random.randn(200, 5)))
-
-	MGD = MultivariateGaussianDistribution
-
-	gmm1 = GeneralMixtureModel.from_samples(MGD, 2, X, init='first-k')
-	gmm2 = GeneralMixtureModel.from_samples(MGD, 2, X, init='first-k', max_iterations=1)
-	assert_greater(gmm1.log_probability(X).sum(), gmm2.log_probability(X).sum())
-
-	assert_equal(gmm1.d, 5)
-	assert_equal(gmm2.d, 5)
+    with pytest.raises(TypeError):
+        GeneralMixtureModel([NormalDistribution(5, 2), MultivariateGaussianDistribution([5, 2], [[1, 0], [0, 1]])])
+    with pytest.raises(TypeError):
+        GeneralMixtureModel([NormalDistribution(5, 2), NormalDistribution])
+
+    X = numpy.concatenate((numpy.random.randn(300, 5) + 0.5, numpy.random.randn(200, 5)))
+
+    MGD = MultivariateGaussianDistribution
+
+    gmm1 = GeneralMixtureModel.from_samples(MGD, 2, X, init='first-k')
+    gmm2 = GeneralMixtureModel.from_samples(MGD, 2, X, init='first-k', max_iterations=1)
+    assert gmm1.log_probability(X).sum() > gmm2.log_probability(X).sum()
+ 
+    assert gmm1.d == 5
+    assert gmm2.d == 5
 
 
 def test_gmm_multivariate_discrete_initialization():
 	d1 = IndependentComponentsDistribution([DiscreteDistribution({'A' : 0.5, 'B' : 0.5}), 
-                                            DiscreteDistribution({'0' : 0.2, '1' : 0.2, '2' : 0.2, '3' : 0.4})])
+											DiscreteDistribution({'0' : 0.2, '1' : 0.2, '2' : 0.2, '3' : 0.4})])
 	d2 = IndependentComponentsDistribution([DiscreteDistribution({'A' : 0.1, 'B' : 0.9}), 
-                                            DiscreteDistribution({'0' : 0.1, '1' : 0.6, '2' : 0.1, '3' : 0.2})])
+											DiscreteDistribution({'0' : 0.1, '1' : 0.6, '2' : 0.1, '3' : 0.2})])
 
 	GeneralMixtureModel([d1, d2], weights=np.array([0.4,0.6]))
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_gmm_dimension():
+def test_gmm_dimension(multivariate_gaussian):
+	gmm = multivariate_gaussian
 	gmm1 = GeneralMixtureModel([NormalDistribution(0, 1), UniformDistribution(0, 10)])
 
-	assert_equal(gmm.d, 5)
-	assert_equal(gmm1.d, 1)
+	assert gmm.d == 5
+	assert gmm1.d == 1
 
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_gmm_json():
+def test_gmm_json(multivariate_gaussian):
+	gmm = multivariate_gaussian
 	univariate = GeneralMixtureModel([NormalDistribution(5, 2), UniformDistribution(0, 10)])
 
 	j_univ = univariate.to_json()
 	j_multi = gmm.to_json()
 
 	new_univ = univariate.from_json(j_univ)
-	assert_true(isinstance(new_univ.distributions[0], NormalDistribution))
-	assert_true(isinstance(new_univ.distributions[1], UniformDistribution))
-	assert_true(isinstance(new_univ, GeneralMixtureModel))
+	assert isinstance(new_univ.distributions[0], NormalDistribution)
+	assert isinstance(new_univ.distributions[1], UniformDistribution)
+	assert isinstance(new_univ, GeneralMixtureModel)
 	assert_array_equal(univariate.weights, new_univ.weights)
 
 	new_multi = gmm.from_json(j_multi)
 	for i in range(5):
-		assert_true(isinstance(new_multi.distributions[i], MultivariateGaussianDistribution))
+		assert isinstance(new_multi.distributions[i], MultivariateGaussianDistribution)
 
-	assert_true(isinstance(new_multi, GeneralMixtureModel))
+	assert isinstance(new_multi, GeneralMixtureModel)
 	assert_array_almost_equal(gmm.weights, new_multi.weights)
 
 
 def test_gmm_univariate_pickling():
 	univariate = GeneralMixtureModel(
 		[NormalDistribution(5, 2), UniformDistribution(0, 10)],
-        weights=np.array([1.0, 2.0]))
+		weights=np.array([1.0, 2.0]))
 
 	j_univ = pickle.dumps(univariate)
 
 	new_univ = pickle.loads(j_univ)
-	assert_true(isinstance(new_univ.distributions[0], NormalDistribution))
-	assert_true(isinstance(new_univ.distributions[1], UniformDistribution))
-	assert_true(isinstance(new_univ, GeneralMixtureModel))
+	assert isinstance(new_univ.distributions[0], NormalDistribution)
+	assert isinstance(new_univ.distributions[1], UniformDistribution)
+	assert isinstance(new_univ, GeneralMixtureModel)
 	assert_array_equal(univariate.weights, new_univ.weights)
 
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_gmm_multivariate_gaussian_pickling():
+def test_gmm_multivariate_gaussian_pickling(multivariate_gaussian):
+	gmm = multivariate_gaussian
 	gmm2 = pickle.loads(pickle.dumps(gmm))
 
 	for d in gmm2.distributions:
-		assert_true(isinstance(d, MultivariateGaussianDistribution))
+		assert isinstance(d, MultivariateGaussianDistribution)
 
-	assert_true(isinstance(gmm2, GeneralMixtureModel))
+	assert isinstance(gmm2, GeneralMixtureModel)
 	assert_array_almost_equal(gmm.weights, gmm2.weights)
 
 
-@with_setup(setup_multivariate_mixed, teardown)
-def test_gmm_multivariate_gaussian_pickling():
-	gmm2 = pickle.loads(pickle.dumps(gmm))
-	d1 = gmm2.distributions[0]
-	d2 = gmm2.distributions[1]
-
-	assert_true(isinstance(d1, IndependentComponentsDistribution))
-	assert_true(isinstance(d2, IndependentComponentsDistribution))
-
-	assert_true(isinstance(d1.distributions[0], NormalDistribution))
-	assert_true(isinstance(d1.distributions[1], ExponentialDistribution))
-	assert_true(isinstance(d1.distributions[2], LogNormalDistribution))
-	assert_true(isinstance(d1.distributions[3], NormalDistribution))
-	assert_true(isinstance(d1.distributions[4], PoissonDistribution))
-
-	assert_true(isinstance(d2.distributions[0], NormalDistribution))
-	assert_true(isinstance(d2.distributions[1], ExponentialDistribution))
-	assert_true(isinstance(d2.distributions[2], LogNormalDistribution))
-	assert_true(isinstance(d2.distributions[3], NormalDistribution))
-	assert_true(isinstance(d2.distributions[4], PoissonDistribution))
-
-	assert_true(isinstance(gmm2, GeneralMixtureModel))
-	assert_array_equal(gmm.weights, gmm2.weights)
-
-
 def test_gmm_multivariate_gaussian_ooc():
 	X = numpy.concatenate([numpy.random.randn(1000, 3) + i for i in range(3)])
 
@@ -547,7 +524,7 @@
 
 	assert_almost_equal(gmm.log_probability(X).sum(), gmm2.log_probability(X).sum())
 	assert_almost_equal(gmm.log_probability(X).sum(), gmm3.log_probability(X).sum(), -2)
-	assert_not_equal(gmm.log_probability(X).sum(), gmm4.log_probability(X).sum())
+	assert gmm.log_probability(X).sum() != gmm4.log_probability(X).sum()
 
 
 def test_gmm_multivariate_mixed_ooc():
@@ -565,7 +542,7 @@
 		batch_size=500, batches_per_epoch=2)
 
 	assert_almost_equal(gmm.log_probability(X).sum(), gmm2.log_probability(X).sum())
-	assert_not_equal(gmm.log_probability(X).sum(), gmm4.log_probability(X).sum())
+	assert gmm.log_probability(X).sum() != gmm4.log_probability(X).sum()
 
 
 def test_gmm_multivariate_gaussian_minibatch():
@@ -580,10 +557,10 @@
 	gmm4 = GeneralMixtureModel.from_samples(MultivariateGaussianDistribution,
 		3, X, init='first-k', max_iterations=5, batch_size=3000, batches_per_epoch=1)
 
-	assert_not_equal(gmm.log_probability(X).sum(), gmm2.log_probability(X).sum())
-	assert_not_equal(gmm2.log_probability(X).sum(), gmm3.log_probability(X).sum())
-	assert_raises(AssertionError, assert_array_almost_equal, gmm3.log_probability(X),
-		gmm.log_probability(X))
+	assert gmm.log_probability(X).sum() != gmm2.log_probability(X).sum()
+	assert gmm2.log_probability(X).sum() != gmm3.log_probability(X).sum()
+	with pytest.raises(AssertionError):
+		assert_array_almost_equal(gmm3.log_probability(X), gmm.log_probability(X))
 
 	assert_array_equal(gmm.log_probability(X), gmm4.log_probability(X))
 
@@ -602,10 +579,10 @@
 	gmm4 = GeneralMixtureModel.from_samples(d, 3, X, init='first-k', max_iterations=5,
 		batch_size=3000, batches_per_epoch=1)
 
-	assert_not_equal(gmm.log_probability(X).sum(), gmm2.log_probability(X).sum())
-	assert_not_equal(gmm2.log_probability(X).sum(), gmm3.log_probability(X).sum())
-	assert_raises(AssertionError, assert_array_almost_equal, gmm3.log_probability(X),
-		gmm.log_probability(X))
+	assert gmm.log_probability(X).sum() != gmm2.log_probability(X).sum()
+	assert gmm2.log_probability(X).sum() != gmm3.log_probability(X).sum()
+	with pytest.raises(AssertionError):
+		assert_array_almost_equal(gmm3.log_probability(X), gmm.log_probability(X))
 
 	assert_array_equal(gmm.log_probability(X), gmm4.log_probability(X))
 
@@ -623,13 +600,13 @@
 
 	mu1t = [-0.036813615311095164, 0.05802948506749107, 0.09725454186262805]
 	cov1t = [[ 1.02529437, -0.11391075,  0.03146951],
- 		  	[-0.11391075,  1.03553592, -0.07852064],
- 		 	[ 0.03146951, -0.07852064,  0.83874547]]
+			[-0.11391075,  1.03553592, -0.07852064],
+			[ 0.03146951, -0.07852064,	0.83874547]]
 
 	mu2t = [8.088079704231793, 7.927924504375215, 8.000474719123183]
 	cov2t = [[ 0.95559825, -0.02582016,  0.07491681],
- 			[-0.02582016,  0.99427793,  0.03304442],
- 			[ 0.07491681,  0.03304442,  1.15403456]]
+			[-0.02582016,  0.99427793,	0.03304442],
+			[ 0.07491681,  0.03304442,	1.15403456]]
 
 	for init in 'first-k', 'random', 'kmeans++':
 		model = GeneralMixtureModel.from_samples(MultivariateGaussianDistribution, 2,
@@ -697,17 +674,17 @@
 	mu1t = [-0.003165176330948316, 0.07462401273020161, 0.04001352280548061]
 	cov1t = [[ 0.98556769, -0.10062447,  0.08213565],
 				[-0.10062447,  1.06955989, -0.03085883],
-				[ 0.08213565, -0.03085883,  0.89728992]]
+				[ 0.08213565, -0.03085883,	0.89728992]]
 
 	mu2t = [2.601485766170187, 2.48231424824341, 2.52771758325412]
 	cov2t = [[ 0.94263451, -0.00361101, -0.02668448],
-	 		[-0.00361101,  1.06339061, -0.00408865],
+			[-0.00361101,  1.06339061, -0.00408865],
 			 [-0.02668448, -0.00408865,  1.14789789]]
 
 	mu3t = [5.950490843670593, 5.9572969419328725, 6.025950220056731]
-	cov3t = [[ 1.03991941, -0.0232587,  -0.02457755],
+	cov3t = [[ 1.03991941, -0.0232587,	-0.02457755],
 				[-0.0232587,   1.01047466, -0.04948464],
-				[-0.02457755, -0.04948464,  0.85671553]]
+				[-0.02457755, -0.04948464,	0.85671553]]
 
 	mu1 = model.distributions[0].parameters[0]
 	cov1 = model.distributions[0].parameters[1]
@@ -726,8 +703,8 @@
 	assert_array_almost_equal(cov3, cov3t)
 
 
-@with_setup(setup_multivariate_mixed, teardown)
-def test_gmm_multivariate_mixed_nan_fit():
+def test_gmm_multivariate_mixed_nan_fit(multivariate_mixed):
+	gmm = multivariate_mixed
 
 	numpy.random.seed(1)
 	X = numpy.concatenate([numpy.random.normal(0, 1, size=(300, 5)),
@@ -750,8 +727,8 @@
 		assert_almost_equal(d2.parameters[0], p2[i], 4)
 
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_gmm_multivariate_gaussian_nan_log_probability():
+def test_gmm_multivariate_gaussian_nan_log_probability(multivariate_gaussian):
+	gmm = multivariate_gaussian
 	numpy.random.seed(1)
 
 	X = numpy.concatenate([numpy.random.normal(0, 1, size=(5, 5)),
@@ -764,8 +741,8 @@
 
 	logp_t = [ -7.73923053e+00,  -7.81725880e+00,  -4.55182482e+00,  -2.45359578e+01,
 			   -8.01289941e+00,  -1.08630517e+01,  -3.10356303e+00,  -3.06193233e+01,
- 			   -5.46424483e+00,  -1.84952128e+01,  -3.15420910e+01,  -1.01635415e+01,
-  			    1.66533454e-16,  -2.70671185e+00,  -5.69860159e+00]
+			   -5.46424483e+00,  -1.84952128e+01,  -3.15420910e+01,  -1.01635415e+01,
+				1.66533454e-16,  -2.70671185e+00,  -5.69860159e+00]
 
 	logp = gmm.log_probability(X)
 	logp2 = gmm.log_probability(X, n_jobs=2)
@@ -776,25 +753,25 @@
 	assert_array_almost_equal(logp4, logp_t)
 
 
-@with_setup(setup_multivariate_mixed, teardown)
-def test_gmm_multivariate_mixed_nan_log_probability():
-	X = numpy.array([[ 1.014,   nan, 1.076, 1.012,   nan],
-		 [ 0.745,   nan,   nan,   nan, 1.226],
-		 [ 0.012,   nan,   nan, 0.010, 0.006],
-		 [   nan,   nan, 0.979, 1.031,   nan],
-		 [ 0.006, 0.003,   nan, 0.006,   nan],
-		 [   nan,   nan,   nan,   nan, 1.041],
+def test_gmm_multivariate_mixed_nan_log_probability(multivariate_mixed):
+	gmm = multivariate_mixed
+	X = numpy.array([[ 1.014,	nan, 1.076, 1.012,	 nan],
+		 [ 0.745,	nan,   nan,   nan, 1.226],
+		 [ 0.012,	nan,   nan, 0.010, 0.006],
+		 [	 nan,	nan, 0.979, 1.031,	 nan],
+		 [ 0.006, 0.003,   nan, 0.006,	 nan],
+		 [	 nan,	nan,   nan,   nan, 1.041],
 		 [ 1.176, 1.040, 1.098, 1.224, 1.186],
-		 [   nan, 0.004,   nan,   nan, 0.005],
-		 [   nan,   nan,   nan, 0.025,   nan],
-		 [ 0.0116,  nan, 0.022, 0.006,   nan]])
+		 [	 nan, 0.004,   nan,   nan, 0.005],
+		 [	 nan,	nan,   nan, 0.025,	 nan],
+		 [ 0.0116,	nan, 0.022, 0.006,	 nan]])
 
 	logp = gmm.log_probability(X)
 	logp2 = gmm.log_probability(X, n_jobs=2)
 	logp4 = gmm.log_probability(X, n_jobs=4)
 
 	logp_t = [ -3.447266,  -4.038374,  -6.900469,  -2.282948,  -1.216055,
-        -3.080241,  -9.789394,  -2.4831  ,  -0.799054, -13.052867]
+		-3.080241,	-9.789394,	-2.4831  ,	-0.799054, -13.052867]
 
 	assert_array_almost_equal(logp, logp_t)
 	assert_array_almost_equal(logp2, logp_t)
@@ -816,15 +793,15 @@
 			X_nan, init=init, n_init=1)
 
 		for d in model.distributions:
-			assert_equal(numpy.isnan(d.parameters[0]).sum(), 0)
-			assert_equal(numpy.isnan(d.parameters[1]).sum(), 0)
+			assert numpy.isnan(d.parameters[0]).sum() == 0
+			assert numpy.isnan(d.parameters[1]).sum() == 0
 
 		y_hat = model.predict(X)
-		assert_equal(y_hat.sum(), 300)
+		assert y_hat.sum() == 300
 
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_gmm_multivariate_gaussian_nan_predict():
+def test_gmm_multivariate_gaussian_nan_predict(multivariate_gaussian):
+	gmm = multivariate_gaussian
 	numpy.random.seed(0)
 	X = numpy.concatenate([numpy.random.normal(0, 1, size=(5, 5)),
 						   numpy.random.normal(2, 1, size=(5, 5))])
@@ -838,31 +815,31 @@
 	assert_array_equal(y, y_hat)
 
 
-@with_setup(setup_multivariate_mixed, teardown)
-def test_gmm_multivariate_mixed_nan_predict():
-	X = [[ 1.014,   nan, 1.076, 1.012,   nan],
-		 [ 0.745,   nan,   nan,   nan, 1.226],
-		 [ 0.012,   nan,   nan, 0.010, 0.006],
-		 [   nan,   nan, 0.979, 1.031,   nan],
-		 [ 0.006, 0.003,   nan, 0.006,   nan],
-		 [   nan,   nan,   nan,   nan, 1.041],
+def test_gmm_multivariate_mixed_nan_predict(multivariate_mixed):
+	gmm = multivariate_mixed
+	X = [[ 1.014,	nan, 1.076, 1.012,	 nan],
+		 [ 0.745,	nan,   nan,   nan, 1.226],
+		 [ 0.012,	nan,   nan, 0.010, 0.006],
+		 [	 nan,	nan, 0.979, 1.031,	 nan],
+		 [ 0.006, 0.003,   nan, 0.006,	 nan],
+		 [	 nan,	nan,   nan,   nan, 1.041],
 		 [ 1.176, 1.040, 1.098, 1.224, 1.186],
-		 [   nan, 0.004,   nan,   nan, 0.005],
-		 [   nan,   nan,   nan, 0.025,   nan],
-		 [ 0.0116,  nan, 0.022, 0.006,   nan]]
+		 [	 nan, 0.004,   nan,   nan, 0.005],
+		 [	 nan,	nan,   nan, 0.025,	 nan],
+		 [ 0.0116,	nan, 0.022, 0.006,	 nan]]
 
 	y_hat = gmm.predict(X)
 	y = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1]
 	assert_array_equal(y, y_hat)
 
 
-@with_setup(setup_multivariate_discrete)
-def test_gmm_multivariate_discrete_predict_proba():
+def test_gmm_multivariate_discrete_predict_proba(multivariate_discrete):
+	gmm = multivariate_discrete
 	probs = gmm.predict_proba(numpy.array([['A', '0'], ['B', '1']]))
 	assert_almost_equal(probs, [[0.86956522, 0.13043478], [0.10989011, 0.89010989]])
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_gmm_multivariate_gaussian_nan_predict_proba():
+def test_gmm_multivariate_gaussian_nan_predict_proba(multivariate_gaussian):
+	gmm = multivariate_gaussian
 	numpy.random.seed(0)
 	X = numpy.concatenate([numpy.random.normal(0, 1, size=(5, 5)),
 						   numpy.random.normal(2, 1, size=(5, 5))])
@@ -872,44 +849,44 @@
 	X[i, j] = numpy.nan
 
 	y_hat = gmm.predict_proba(X)
-	y = [[  9.90174494e-01, 9.82550625e-03, 2.20378830e-10, 1.11726590e-23, 1.28030945e-42],
-		 [  2.53691739e-01, 7.46308013e-01, 2.47068952e-07, 9.20463413e-21, 3.85907464e-41],
-		 [  9.97082301e-01, 2.91769906e-03, 1.18573581e-16, 6.69226809e-41, 5.24561818e-76],
-		 [  9.94236788e-01, 5.76321240e-03, 7.55111628e-11, 2.23629671e-24, 1.49699183e-43],
-		 [  9.56664759e-01, 4.33351517e-02, 8.91201785e-08, 8.32083585e-18, 3.52706158e-32],
-		 [  9.98269488e-01, 1.73051193e-03, 3.37590087e-13, 7.41127732e-30, 1.83098477e-53],
-		 [  1.75007605e-01, 8.24992395e-01, 3.63922167e-13, 1.50221681e-38, 5.80259513e-77],
-		 [  8.76384248e-01, 1.23615751e-01, 7.21849843e-10, 1.74507352e-25, 1.74652336e-48],
-		 [  2.12508922e-03, 9.45913785e-01, 5.19607732e-02, 3.52248634e-07, 2.94694950e-16],
-		 [  8.63688760e-03, 9.91106040e-01, 2.57071963e-04, 1.50716583e-13, 1.99728068e-28]]
+	y = [[	9.90174494e-01, 9.82550625e-03, 2.20378830e-10, 1.11726590e-23, 1.28030945e-42],
+		 [	2.53691739e-01, 7.46308013e-01, 2.47068952e-07, 9.20463413e-21, 3.85907464e-41],
+		 [	9.97082301e-01, 2.91769906e-03, 1.18573581e-16, 6.69226809e-41, 5.24561818e-76],
+		 [	9.94236788e-01, 5.76321240e-03, 7.55111628e-11, 2.23629671e-24, 1.49699183e-43],
+		 [	9.56664759e-01, 4.33351517e-02, 8.91201785e-08, 8.32083585e-18, 3.52706158e-32],
+		 [	9.98269488e-01, 1.73051193e-03, 3.37590087e-13, 7.41127732e-30, 1.83098477e-53],
+		 [	1.75007605e-01, 8.24992395e-01, 3.63922167e-13, 1.50221681e-38, 5.80259513e-77],
+		 [	8.76384248e-01, 1.23615751e-01, 7.21849843e-10, 1.74507352e-25, 1.74652336e-48],
+		 [	2.12508922e-03, 9.45913785e-01, 5.19607732e-02, 3.52248634e-07, 2.94694950e-16],
+		 [	8.63688760e-03, 9.91106040e-01, 2.57071963e-04, 1.50716583e-13, 1.99728068e-28]]
 
 	assert_array_almost_equal(y, y_hat, 2)
 
 
-@with_setup(setup_multivariate_mixed, teardown)
-def test_gmm_multivariate_mixed_nan_predict_proba():
-	X = [[ 1.014,   nan, 1.076, 1.012,   nan],
-		 [ 0.745,   nan,   nan,   nan, 1.226],
-		 [ 0.012,   nan,   nan, 0.010, 0.006],
-		 [   nan,   nan, 0.979, 1.031,   nan],
-		 [ 0.006, 0.003,   nan, 0.006,   nan],
-		 [   nan,   nan,   nan,   nan, 1.041],
+def test_gmm_multivariate_mixed_nan_predict_proba(multivariate_mixed):
+	gmm = multivariate_mixed
+	X = [[ 1.014,	nan, 1.076, 1.012,	 nan],
+		 [ 0.745,	nan,   nan,   nan, 1.226],
+		 [ 0.012,	nan,   nan, 0.010, 0.006],
+		 [	 nan,	nan, 0.979, 1.031,	 nan],
+		 [ 0.006, 0.003,   nan, 0.006,	 nan],
+		 [	 nan,	nan,   nan,   nan, 1.041],
 		 [ 1.176, 1.040, 1.098, 1.224, 1.186],
-		 [   nan, 0.004,   nan,   nan, 0.005],
-		 [   nan,   nan,   nan, 0.025,   nan],
-		 [ 0.0116,  nan, 0.022, 0.006,   nan]]
+		 [	 nan, 0.004,   nan,   nan, 0.005],
+		 [	 nan,	nan,   nan, 0.025,	 nan],
+		 [ 0.0116,	nan, 0.022, 0.006,	 nan]]
 
 	y_hat = gmm.predict_proba(X)
-	y = [[  9.54354263e-01,   4.56457374e-02],
-		 [  9.82240224e-01,   1.77597761e-02],
-		 [  9.97442611e-01,   2.55738927e-03],
-		 [  7.48577347e-01,   2.51422653e-01],
-		 [  8.93432925e-01,   1.06567075e-01],
-		 [  8.28908436e-01,   1.71091564e-01],
-		 [  1.00000000e+00,   2.78899961e-15],
-		 [  5.42909820e-01,   4.57090180e-01],
-		 [  4.96708777e-01,   5.03291223e-01],
-		 [  1.31011533e-01,   8.68988467e-01]]
+	y = [[	9.54354263e-01,   4.56457374e-02],
+		 [	9.82240224e-01,   1.77597761e-02],
+		 [	9.97442611e-01,   2.55738927e-03],
+		 [	7.48577347e-01,   2.51422653e-01],
+		 [	8.93432925e-01,   1.06567075e-01],
+		 [	8.28908436e-01,   1.71091564e-01],
+		 [	1.00000000e+00,   2.78899961e-15],
+		 [	5.42909820e-01,   4.57090180e-01],
+		 [	4.96708777e-01,   5.03291223e-01],
+		 [	1.31011533e-01,   8.68988467e-01]]
 
 	assert_array_almost_equal(y, y_hat)
 
@@ -1078,8 +1055,10 @@
 	cov4 = model4.distributions[0].parameters[1]
 
 	assert_array_almost_equal(cov1, cov2)
-	assert_raises(AssertionError, assert_array_almost_equal, cov1, cov3)
-	assert_raises(AssertionError, assert_array_almost_equal, cov1, cov4)
+	with pytest.raises(AssertionError):
+		assert_array_almost_equal(cov1, cov3)
+	with pytest.raises(AssertionError):
+		assert_array_almost_equal(cov1, cov4)
 
 
 def test_gmm_multivariate_mixed_minibatch_nan_from_samples():
@@ -1107,8 +1086,10 @@
 	p4 = model4.distributions[0].distributions[0].parameters[0]
 
 	assert_almost_equal(p1, p2)
-	assert_raises(AssertionError, assert_almost_equal, p1, p3)
-	assert_raises(AssertionError, assert_almost_equal, p1, p4)
+	with pytest.raises(AssertionError):
+		assert_almost_equal(p1, p3)
+	with pytest.raises(AssertionError):
+		assert_almost_equal(p1, p4)
 
 	p1 = model1.distributions[0].distributions[1].parameters[0]
 	p2 = model2.distributions[0].distributions[1].parameters[0]
@@ -1116,8 +1097,10 @@
 	p4 = model4.distributions[0].distributions[1].parameters[0]
 
 	assert_almost_equal(p1, p2)
-	assert_raises(AssertionError, assert_almost_equal, p1, p3)
-	assert_raises(AssertionError, assert_almost_equal, p1, p4)
+	with pytest.raises(AssertionError):
+		assert_almost_equal(p1, p3)
+	with pytest.raises(AssertionError):
+		assert_almost_equal(p1, p4)
 
 	p1 = model1.distributions[0].distributions[2].parameters[0]
 	p2 = model2.distributions[0].distributions[2].parameters[0]
@@ -1125,8 +1108,10 @@
 	p4 = model4.distributions[0].distributions[2].parameters[0]
 
 	assert_almost_equal(p1, p2)
-	assert_raises(AssertionError, assert_almost_equal, p1, p3)
-	assert_raises(AssertionError, assert_almost_equal, p1, p4)
+	with pytest.raises(AssertionError):
+		assert_almost_equal(p1, p3)
+	with pytest.raises(AssertionError):
+		assert_almost_equal(p1, p4)
 
 def test_gmm_multivariate_gaussian_minibatch_nan_fit():
 	X = numpy.concatenate([numpy.random.normal(i*3, 0.5, size=(100, 3)) for i in range(2)])
@@ -1156,7 +1141,8 @@
 	cov3 = model3.distributions[0].parameters[1]
 
 	assert_array_almost_equal(cov1, cov2)
-	assert_raises(AssertionError, assert_array_equal, cov1, cov3)
+	with pytest.raises(AssertionError):
+		assert_array_equal(cov1, cov3)
 
 
 def test_gmm_multivariate_mixed_minibatch_nan_fit():
@@ -1190,43 +1176,48 @@
 	p3 = model3.distributions[0].distributions[0].parameters[0]
 
 	assert_almost_equal(p1, p2)
-	assert_raises(AssertionError, assert_almost_equal, p1, p3)
+	with pytest.raises(AssertionError):
+		assert_almost_equal(p1, p3)
 
 	p1 = model1.distributions[0].distributions[1].parameters[0]
 	p2 = model2.distributions[0].distributions[1].parameters[0]
 	p3 = model3.distributions[0].distributions[1].parameters[0]
 
 	assert_almost_equal(p1, p2)
-	assert_raises(AssertionError, assert_almost_equal, p1, p3)
+	with pytest.raises(AssertionError):
+		assert_almost_equal(p1, p3)
 
 	p1 = model1.distributions[0].distributions[2].parameters[0]
 	p2 = model2.distributions[0].distributions[2].parameters[0]
 	p3 = model3.distributions[0].distributions[2].parameters[0]
 
 	assert_almost_equal(p1, p2)
-	assert_raises(AssertionError, assert_almost_equal, p1, p3)
+	with pytest.raises(AssertionError):
+		assert_almost_equal(p1, p3)
 
-@with_setup(setup_multivariate_mixed, teardown)
-def test_gmm_multivariate_mixed_random_sample():
-	x = numpy.array([[ 4.61023781e+00,  2.68284944e-02,  5.15017758e+01,  1.62716647e+00, 5.00000000e+00],
-					 [ 1.97078941e+00,  1.02011059e-02,  1.53902626e+00, -2.20447410e-01, 4.00000000e+00],
-					 [-2.37630710e-01,  2.01296137e-02,  8.15899779e-02, -1.76634150e+00, 1.00000000e+01]])
+def test_gmm_multivariate_mixed_random_sample(multivariate_mixed):
+	gmm = multivariate_mixed
+	x = numpy.array([[ 4.61023781e+00,	2.68284944e-02,  5.15017758e+01,  1.62716647e+00, 5.00000000e+00],
+					 [ 1.97078941e+00,	1.02011059e-02,  1.53902626e+00, -2.20447410e-01, 4.00000000e+00],
+					 [-2.37630710e-01,	2.01296137e-02,  8.15899779e-02, -1.76634150e+00, 1.00000000e+01]])
 
 	assert_array_almost_equal(gmm.sample(3, random_state=5), x)
-	assert_raises(AssertionError, assert_array_almost_equal, gmm.sample(3), x)
+	with pytest.raises(AssertionError):
+		assert_array_almost_equal(gmm.sample(3), x)
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_gmm_multivariate_mixed_random_sample():
+def test_gmm_multivariate_mixed_random_sample(multivariate_gaussian):
+	gmm = multivariate_gaussian
 	x = numpy.array([[-0.937128,  3.919795,  7.066424, 11.901844, 16.691532],
-			         [ 1.875753,  0.915462,  2.591164,  3.173006,  4.656439],
-			         [ 0.307347,  3.411364,  7.450107, 11.520762, 16.511734]])
+					 [ 1.875753,  0.915462,  2.591164,	3.173006,  4.656439],
+					 [ 0.307347,  3.411364,  7.450107, 11.520762, 16.511734]])
 
 	assert_array_almost_equal(gmm.sample(3, random_state=5), x)
-	assert_raises(AssertionError, assert_array_almost_equal, gmm.sample(3), x)
+	with pytest.raises(AssertionError):
+		assert_array_almost_equal(gmm.sample(3), x)
 
 
-@with_setup(setup_multivariate_mixed_discrete_other)
-def test_gmm_multivariate_mixed_discrete_other_fit():
+def test_gmm_multivariate_mixed_discrete_other_fit(multivariate_mixed_discrete_other):
+	gmm = multivariate_mixed_discrete_other
 	x = numpy.array(
 		[
 			[0, 'A', '0'],
--- python-pomegranate.orig/tests/test_hmm.py
+++ python-pomegranate/tests/test_hmm.py
@@ -1,23 +1,16 @@
 from __future__ import (division)
 
 from pomegranate import *
-from pomegranate.parallel import log_probability
 from pomegranate.io import SequenceGenerator
 
-from .tools import with_setup
-from .tools import assert_almost_equal
-from .tools import assert_equal
-from .tools import assert_not_equal
-from .tools import assert_less_equal
-from .tools import assert_raises
-from .tools import assert_greater
+from .assert_tools import assert_almost_equal
 from numpy.testing import assert_array_almost_equal
 from numpy.testing import assert_array_equal
 
 import pickle
 import random
 import numpy
-import time
+import pytest
 
 numpy.random.seed(0)
 random.seed(0)
@@ -92,61 +85,60 @@
     return model
 
 
-def setup():
-    global model
-
+@pytest.fixture
+def model():
     i_d = DiscreteDistribution({ 'A': 0.25, 'C': 0.25, 'G': 0.25, 'T': 0.25 })
     d1 = DiscreteDistribution({ "A": 0.95, 'C': 0.01, 'G': 0.01, 'T': 0.02 })
     d2 = DiscreteDistribution({ "A": 0.003, 'C': 0.99, 'G': 0.003, 'T': 0.004 })
     d3 = DiscreteDistribution({ "A": 0.01, 'C': 0.01, 'G': 0.01, 'T': 0.97 })
 
-    model = sparse_model(d1, d2, d3, i_d)
-
+    return sparse_model(d1, d2, d3, i_d)
 
-def setup_multivariate_discrete_sparse():
-    global model
 
+@pytest.fixture
+def multivariate_discrete_sparse():
     i1 = DiscreteDistribution({'A': 0.25, 'C': 0.25, 'G': 0.25, 'T': 0.25})
     i2 = DiscreteDistribution({'A': 0.25, 'C': 0.25, 'G': 0.25, 'T': 0.25})
     i_d = IndependentComponentsDistribution([i1, i2])
-
+    
     d11 = DiscreteDistribution({ "A": 0.95, 'C': 0.01, 'G': 0.01, 'T': 0.02 })
     d12 = DiscreteDistribution({ "A": 0.92, 'C': 0.02, 'G': 0.02, 'T': 0.03 })
-
+    
     d21 = DiscreteDistribution({ "A": 0.005, 'C': 0.96, 'G': 0.005, 'T': 0.003 })
     d22 = DiscreteDistribution({ "A": 0.003, 'C': 0.99, 'G': 0.003, 'T': 0.004 })
-
+    
     d31 = DiscreteDistribution({ "A": 0.01, 'C': 0.01, 'G': 0.01, 'T': 0.97 })
     d32 = DiscreteDistribution({ "A": 0.05, 'C': 0.03, 'G': 0.02, 'T': 0.90 })
-
+    
     d1 = IndependentComponentsDistribution([d11, d12])
     d2 = IndependentComponentsDistribution([d21, d22])
     d3 = IndependentComponentsDistribution([d31, d32])
-
+    
     model = sparse_model(d1, d2, d3, i_d)
+    return model
 
 
-def setup_multivariate_gaussian_sparse():
-    global model
-
+@pytest.fixture
+def multivariate_gaussian_sparse():
     i1 = UniformDistribution(-20, 20)
     i2 = UniformDistribution(-20, 20)
     i_d = IndependentComponentsDistribution([i1, i2])
-
+    
     d11 = NormalDistribution(5, 1)
     d12 = NormalDistribution(7, 1)
-
+    
     d21 = NormalDistribution(13, 1)
     d22 = NormalDistribution(17, 1)
-
+    
     d31 = NormalDistribution(-2, 1)
     d32 = NormalDistribution(-5, 1)
-
+    
     d1 = IndependentComponentsDistribution([d11, d12])
     d2 = IndependentComponentsDistribution([d21, d22])
     d3 = IndependentComponentsDistribution([d31, d32])
-
+    
     model = sparse_model(d1, d2, d3, i_d)
+    return model
 
 
 def dense_model(d1, d2, d3, d4):
@@ -185,42 +177,41 @@
     return model
 
 
-def setup_univariate_discrete_dense():
-    global model
-
+@pytest.fixture
+def univariate_discrete_dense():
     d1 = DiscreteDistribution({'A': 0.90, 'B': 0.02, 'C': 0.03, 'D': 0.05})
     d2 = DiscreteDistribution({'A': 0.02, 'B': 0.90, 'C': 0.03, 'D': 0.05})
     d3 = DiscreteDistribution({'A': 0.03, 'B': 0.02, 'C': 0.90, 'D': 0.05})
     d4 = DiscreteDistribution({'A': 0.05, 'B': 0.02, 'C': 0.03, 'D': 0.90})
 
     model = dense_model(d1, d2, d3, d4)
+    return model
 
 
-def setup_univariate_gaussian_dense():
-    global model
-
+@pytest.fixture
+def univariate_gaussian_dense():
     d1 = NormalDistribution(5, 1)
     d2 = NormalDistribution(1, 1)
     d3 = NormalDistribution(13, 2)
     d4 = NormalDistribution(16, 0.5)
 
     model = dense_model(d1, d2, d3, d4)
+    return model
 
 
-def setup_univariate_poisson_dense():
-    global model
-
+@pytest.fixture
+def univariate_poisson_dense():
     d1 = PoissonDistribution(12.1)
     d2 = PoissonDistribution(8.7)
     d3 = PoissonDistribution(1)
     d4 = PoissonDistribution(5)
 
     model = dense_model(d1, d2, d3, d4)
+    return model
 
 
-def setup_multivariate_mixed_dense():
-    global model
-
+@pytest.fixture
+def multivariate_mixed_dense():
     d11 = NormalDistribution(1, 1)
     d12 = ExponentialDistribution(5)
     d13 = LogNormalDistribution(0.5, 0.78)
@@ -250,11 +241,11 @@
     d4 = IndependentComponentsDistribution([d41, d42, d43, d44, d45])
 
     model = dense_model(d1, d2, d3, d4)
+    return model
 
 
-def setup_multivariate_gaussian_dense():
-    global model
-
+@pytest.fixture
+def multivariate_gaussian_dense():
     random_state = numpy.random.RandomState(0)
     mu = random_state.normal(0, 1, size=(4, 5))
     d1 = MultivariateGaussianDistribution(mu[0], numpy.eye(5))
@@ -263,11 +254,11 @@
     d4 = MultivariateGaussianDistribution(mu[3], numpy.eye(5))
 
     model = dense_model(d1, d2, d3, d4)
+    return model
 
 
-def setup_general_mixture_gaussian():
-    global model
-
+@pytest.fixture
+def general_mixture_gaussian():
     # should be able to pass list of weights
     gmm1 = GeneralMixtureModel([NormalDistribution(5, 2), NormalDistribution(1, 2)], weights=[0.33, 0.67])
     gmm2 = GeneralMixtureModel([NormalDistribution(3, 2), NormalDistribution(-1, 2)], weights=numpy.array([0.67, 0.33]))
@@ -282,19 +273,11 @@
     model.add_transition(s2, s2, 0.8)
     model.add_transition(s2, s1, 0.2)
     model.bake()
+    return model
 
 
-def teardown():
-    '''
-    Remove the model at the end of the unit testing. Since it is stored in a
-    global variance, simply delete it.
-    '''
-
-    pass
-
-
-@with_setup(setup_univariate_discrete_dense)
-def test_hmm_univariate_discrete_dense_forward():
+def test_hmm_univariate_discrete_dense_forward(univariate_discrete_dense):
+    model = univariate_discrete_dense
     f = model.forward(['A', 'B', 'D', 'D', 'C'])
     logp = numpy.array([[-inf, -inf, -inf, -inf, 0., -inf],
                 [-2.40794561, -5.11599581, -5.11599581, -3.91202301, -inf, -4.40631933],
@@ -306,8 +289,8 @@
     assert_array_almost_equal(f, logp)
 
 
-@with_setup(setup_univariate_gaussian_dense)
-def test_hmm_univariate_gaussian_dense_forward():
+def test_hmm_univariate_gaussian_dense_forward(univariate_gaussian_dense):
+    model = univariate_gaussian_dense
     f = model.forward([3, 5, 8, 19, 13])
     logp = numpy.array([[-inf, -inf, -inf, -inf, 0.0, -inf],
         [-5.221523626198319, -4.122911337530209, -15.72152362619832, -339.14208208451845, -inf, -6.137807473983832],
@@ -319,8 +302,8 @@
     assert_array_almost_equal(f, logp)
 
 
-@with_setup(setup_univariate_poisson_dense)
-def test_hmm_univariate_poisson_dense_forward():
+def test_hmm_univariate_poisson_dense_forward(univariate_poisson_dense):
+    model = univariate_poisson_dense
     f = model.forward([5, 8, 2, 4, 7, 8, 2])
     logp = numpy.array([[-inf, -inf, -inf, -inf, 0.0, -inf],
         [-6.724049572762615, -3.874849418805291, -7.396929655216146, -2.6565929124856993, -inf, -4.680333421931903],
@@ -334,8 +317,8 @@
     assert_array_almost_equal(f, logp)
 
 
-@with_setup(setup_multivariate_mixed_dense)
-def test_hmm_multivariate_mixed_dense_forward():
+def test_hmm_multivariate_mixed_dense_forward(multivariate_mixed_dense):
+    model = multivariate_mixed_dense
     f = model.forward([[0, 1, 5, 2, 3], [2, 4, 1, 5, 6], [4, 6, 2, 0, 1]])
     logp = numpy.array([[ -inf,  -inf,  -inf,  -inf,  0.,  -inf],
          [ -14.73222089,  -46.16604623,  -29.00420739,  -62.64844211, -inf, -17.03480535],
@@ -345,8 +328,8 @@
     assert_array_almost_equal(f, logp)
 
 
-@with_setup(setup_multivariate_gaussian_dense)
-def test_hmm_multivariate_gaussian_dense_forward():
+def test_hmm_multivariate_gaussian_dense_forward(multivariate_gaussian_dense):
+    model = multivariate_gaussian_dense
     f = model.forward([[0, 1, 5, 2, 3], [2, 4, 1, 5, 6], [-4, 6, -2, 0, 1]])
     logp = numpy.array([[-inf, -inf, -inf, -inf, 0.0, -inf],
         [-17.388625105797296, -25.109952452723487, -20.33305760532214, -28.085443290422877, -inf, -19.639474084287432],
@@ -356,8 +339,8 @@
     assert_array_almost_equal(f, logp)
 
 
-@with_setup(setup_univariate_discrete_dense)
-def test_hmm_univariate_discrete_dense_nan_forward():
+def test_hmm_univariate_discrete_dense_nan_forward(univariate_discrete_dense):
+    model = univariate_discrete_dense
     f = model.forward(['A', nan, 'D', nan, 'C'])
     logp = numpy.array([[       -inf,        -inf,        -inf,        -inf,  0.,                -inf],
          [-2.40794561, -5.11599581, -5.11599581, -3.91202301,        -inf, -4.40631933],
@@ -369,8 +352,8 @@
     assert_array_almost_equal(f, logp)
 
 
-@with_setup(setup_univariate_gaussian_dense)
-def test_hmm_univariate_gaussian_dense_nan_forward():
+def test_hmm_univariate_gaussian_dense_nan_forward(univariate_gaussian_dense):
+    model = univariate_gaussian_dense
     f = model.forward([3, 5, 8, nan, 13])
     logp = numpy.array([[         -inf,          -inf, -inf, -inf, 0., -inf],
          [  -5.22152363,   -4.12291134,  -15.72152363, -339.14208208, -inf, -6.13780747],
@@ -382,8 +365,8 @@
     assert_array_almost_equal(f, logp)
 
 
-@with_setup(setup_univariate_poisson_dense)
-def test_hmm_univariate_poisson_dense_nan_forward():
+def test_hmm_univariate_poisson_dense_nan_forward(univariate_poisson_dense):
+    model = univariate_poisson_dense
     f = model.forward([5, 8, 2, nan, 7, nan, 2])
     logp = numpy.array([[        -inf,         -inf, -inf, -inf, 0., -inf],
          [ -6.72404957,  -3.87484942,  -7.39692966,  -2.65659291, -inf, -4.68033342],
@@ -397,8 +380,8 @@
     assert_array_almost_equal(f, logp)
 
 
-@with_setup(setup_multivariate_mixed_dense)
-def test_hmm_multivariate_mixed_dense_nan_forward():
+def test_hmm_multivariate_mixed_dense_nan_forward(multivariate_mixed_dense):
+    model = multivariate_mixed_dense
     f = model.forward([[0, nan, 5, nan, 3], [nan, 4, 1, 5, 6], [4, nan, 2, nan, 1]])
     logp = numpy.array([[-inf, -inf, -inf, -inf, 0.0, -inf],
         [-8.64586382, -11.86321233, -20.72307256, -49.92199169, -inf, -10.90916394],
@@ -408,8 +391,8 @@
     assert_array_almost_equal(f, logp)
 
 
-@with_setup(setup_multivariate_gaussian_dense)
-def test_hmm_multivariate_gaussian_dense_nan_forward():
+def test_hmm_multivariate_gaussian_dense_nan_forward(multivariate_gaussian_dense):
+    model = multivariate_gaussian_dense
     f = model.forward([[0, nan, 5, nan, 3], [nan, 4, 1, 5, 6], [-4, nan, -2, nan, 1]])
     logp = numpy.array([[        -inf,         -inf,        -inf, -inf,   0.,  -inf],
          [-15.34182759, -21.05906503, -16.62794596, -24.70263887, -inf, -17.39777437],
@@ -419,8 +402,8 @@
     assert_array_almost_equal(f, logp)
 
 
-@with_setup(setup_univariate_discrete_dense)
-def test_hmm_univariate_discrete_dense_backward():
+def test_hmm_univariate_discrete_dense_backward(univariate_discrete_dense):
+    model = univariate_discrete_dense
     f = model.backward(['A', 'B', 'D', 'D', 'C'])
     logp = numpy.array([[-9.86805902419294, -10.666561769922483, -11.09973677168472, -10.617074536069564, -11.092510372852566, -inf],
         [-9.120551817416588, -9.07513780778706, -9.061129343592423, -8.517934491110973, -8.143527673240188, -inf],
@@ -432,8 +415,8 @@
     assert_array_almost_equal(f, logp)
 
 
-@with_setup(setup_univariate_gaussian_dense)
-def test_hmm_univariate_gaussian_dense_backward():
+def test_hmm_univariate_gaussian_dense_backward(univariate_gaussian_dense):
+    model = univariate_gaussian_dense
     f = model.backward([3, 5, 8, 19, 13])
     logp = numpy.array([[-24.010022764471987, -24.820878919065986, -25.359784144328874, -24.666641886986977, -24.907606167690343, -inf],
         [-20.47495458748052, -21.390489786220005, -22.08295469697486, -21.390527588974052, -22.081667004696868, -inf],
@@ -445,8 +428,8 @@
     assert_array_almost_equal(f, logp)
 
 
-@with_setup(setup_univariate_poisson_dense)
-def test_hmm_univariate_poisson_dense_backward():
+def test_hmm_univariate_poisson_dense_backward(univariate_poisson_dense):
+    model = univariate_poisson_dense
     f = model.backward([5, 8, 2, 4, 7, 8, 2])
     logp = numpy.array([[-21.691907586187032, -21.73799328948721, -21.138366991878907, -21.083291604178275, -21.044274521213687, -inf],
         [-18.8959690490499, -19.147279755417475, -18.96895836891973, -18.554384880883024, -18.344028493972242, -inf],
@@ -460,8 +443,8 @@
     assert_array_almost_equal(f, logp)
 
 
-@with_setup(setup_multivariate_mixed_dense)
-def test_hmm_multivariate_mixed_dense_backward():
+def test_hmm_multivariate_mixed_dense_backward(multivariate_mixed_dense):
+    model = multivariate_mixed_dense
     f = model.backward([[0, 1, 5, 2, 3], [2, 4, 1, 5, 6], [4, 6, 2, 0, 1]])
     logp = numpy.array([[-95.62390845, -96.54019907, -97.23334619, -96.54019916, -97.23334625, -inf],
         [-82.50112549, -83.41741621, -84.11056338, -83.41741622, -84.11056339, -inf],
@@ -471,8 +454,8 @@
 
     assert_array_almost_equal(f, logp)
 
-@with_setup(setup_multivariate_gaussian_dense)
-def test_hmm_multivariate_gaussian_dense_backward():
+def test_hmm_multivariate_gaussian_dense_backward(multivariate_gaussian_dense):
+    model = multivariate_gaussian_dense
     f = model.backward([[0, 1, 5, 2, 3], [2, 4, 1, 5, 6], [-4, 6, -2, 0, 1]])
     logp = numpy.array([[-68.2539137567533, -69.16076639610863, -69.84868309756291, -69.16857768159394, -69.85376066028503, -inf],
         [-52.47579136451719, -53.392079325925124, -54.08522496164164, -53.392081629466915, -54.08522649261687, -inf],
@@ -481,8 +464,8 @@
     assert_array_almost_equal(f, logp)
 
 
-@with_setup(setup_univariate_discrete_dense)
-def test_hmm_univariate_discrete_dense_nan_backward():
+def test_hmm_univariate_discrete_dense_nan_backward(univariate_discrete_dense):
+    model = univariate_discrete_dense
     f = model.backward(['A', nan, 'D', nan, 'C'])
     logp = numpy.array([[-6.2351892, -7.03937052, -7.53995041, -7.0284468, -7.53208902, -inf],
         [-5.47529921, -5.28136881, -5.22459211, -5.34120086, -5.21037792, -inf],
@@ -494,8 +477,8 @@
     assert_array_almost_equal(f, logp)
 
 
-@with_setup(setup_univariate_gaussian_dense)
-def test_hmm_univariate_gaussian_dense_nan_backward():
+def test_hmm_univariate_gaussian_dense_nan_backward(univariate_gaussian_dense):
+    model = univariate_gaussian_dense
     f = model.backward([3, 5, 8, nan, 13])
     logp = numpy.array([[-16.72877261, -17.53965174, -18.0785865, -17.38544424, -17.62647294, -inf],
         [-13.19368605, -14.10946925, -14.80215433, -14.10948639, -14.80126502, -inf],
@@ -507,8 +490,8 @@
     assert_array_almost_equal(f, logp)
 
 
-@with_setup(setup_univariate_poisson_dense)
-def test_hmm_univariate_poisson_dense_nan_backward():
+def test_hmm_univariate_poisson_dense_nan_backward(univariate_poisson_dense):
+    model = univariate_poisson_dense
     f = model.backward([5, 8, 2, nan, 7, nan, 2])
     logp = numpy.array([[-16.63348003, -16.67956771, -16.0798998, -16.02482554, -15.98581404, -inf],
         [-13.83793763, -14.08893409, -13.91233676, -13.49588981, -13.28439885, -inf],
@@ -522,8 +505,8 @@
     assert_array_almost_equal(f, logp)
 
 
-@with_setup(setup_multivariate_mixed_dense)
-def test_hmm_multivariate_mixed_dense_nan_backward():
+def test_hmm_multivariate_mixed_dense_nan_backward(multivariate_mixed_dense):
+    model = multivariate_mixed_dense
     f = model.backward([[0, nan, 5, nan, 3], [nan, 4, 1, 5, 6], [4, nan, 2, nan, 1]])
     logp = numpy.array([[-58.07503525, -58.9897255, -59.68021202, -58.98706625, -59.66964241, -inf],
         [-51.03967719, -51.95596417, -52.64910927, -51.95596729, -52.64911135, -inf],
@@ -533,8 +516,8 @@
     assert_array_almost_equal(f, logp)
 
 
-@with_setup(setup_multivariate_gaussian_dense)
-def test_hmm_multivariate_gaussian_dense_nan_backward():
+def test_hmm_multivariate_gaussian_dense_nan_backward(multivariate_gaussian_dense):
+    model = multivariate_gaussian_dense
     f = model.backward([[0, nan, 5, nan, 3], [nan, 4, 1, 5, 6], [-4, nan, -2, nan, 1]])
     logp = numpy.array([[-50.72705835, -51.59493939, -52.26201549, -51.63477927, -52.28702405, -inf],
         [-37.00027105, -37.91654779, -38.60968708, -37.91655924, -38.60969427, -inf],
@@ -544,8 +527,8 @@
     assert_array_almost_equal(f, logp)
 
 
-@with_setup(setup_univariate_discrete_dense)
-def test_hmm_univariate_discrete_dense_predict_log_proba():
+def test_hmm_univariate_discrete_dense_predict_log_proba(univariate_discrete_dense):
+    model = univariate_discrete_dense
     f = model.predict_log_proba(['A', 'B', 'D', 'D', 'C'])
     logp = numpy.array([[-0.43598705, -3.09862324, -3.08461478, -1.33744712],
         [-2.75011524, -0.18557067, -3.32315748, -2.66771698],
@@ -556,8 +539,8 @@
     assert_array_almost_equal(f, logp)
 
 
-@with_setup(setup_univariate_gaussian_dense)
-def test_hmm_univariate_gaussian_dense_predict_log_proba():
+def test_hmm_univariate_gaussian_dense_predict_log_proba(univariate_gaussian_dense):
+    model = univariate_gaussian_dense
     f = model.predict_log_proba([3, 5, 8, 19, 13])
     logp = numpy.array([[-0.78887205, -0.60579496, -12.89687216, -335.62500351],
         [-0.00062775, -8.15629975, -7.98472576, -241.94669106],
@@ -568,8 +551,8 @@
     assert_array_almost_equal(f, logp)
 
 
-@with_setup(setup_univariate_poisson_dense)
-def test_hmm_univariate_poisson_dense_predict_log_proba():
+def test_hmm_univariate_poisson_dense_predict_log_proba(univariate_poisson_dense):
+    model = univariate_poisson_dense
     f = model.predict_log_proba([5, 8, 2, 4, 7, 8, 2])
     logp = numpy.array([[-4.5757441, -1.97785465, -5.3216135, -0.16670327],
         [-2.12263997, -0.57356459, -10.23962484, -1.14968683],
@@ -582,8 +565,8 @@
     assert_array_almost_equal(f, logp)
 
 
-@with_setup(setup_multivariate_mixed_dense)
-def test_hmm_multivariate_mixed_dense_predict_log_proba():
+def test_hmm_multivariate_mixed_dense_predict_log_proba(multivariate_mixed_dense):
+    model = multivariate_mixed_dense
     f = model.predict_log_proba([[0, 1, 5, 2, 3], [2, 4, 1, 5, 6], [4, 6, 2, 0, 1]])
     logp = numpy.array([[-1.3e-07, -32.35011618, -15.88142452, -48.83251208],
         [-0.0, -125.38948844, -20.25379271, -140.90667878],
@@ -591,8 +574,8 @@
 
     assert_array_almost_equal(f, logp)
 
-@with_setup(setup_multivariate_mixed_dense)
-def test_hmm_multivariate_mixed_dense_predict_log_proba_from_json():
+def test_hmm_multivariate_mixed_dense_predict_log_proba_from_json(multivariate_mixed_dense):
+    model = multivariate_mixed_dense
     logp = numpy.array([[-1.3e-07, -32.35011618, -15.88142452, -48.83251208],
         [-0.0, -125.38948844, -20.25379271, -140.90667878],
         [-3.29e-06, -178.10391312, -12.6258323, -138.86820172]])
@@ -602,8 +585,8 @@
     f = hmm_json.predict_log_proba(s)
     assert_array_almost_equal(f, logp)
 
-@with_setup(setup_multivariate_mixed_dense)
-def test_hmm_multivariate_mixed_dense_predict_log_proba_from_yaml():
+def test_hmm_multivariate_mixed_dense_predict_log_proba_from_yaml(multivariate_mixed_dense):
+    model = multivariate_mixed_dense
     logp = numpy.array([[-1.3e-07, -32.35011618, -15.88142452, -48.83251208],
         [-0.0, -125.38948844, -20.25379271, -140.90667878],
         [-3.29e-06, -178.10391312, -12.6258323, -138.86820172]])
@@ -613,8 +596,8 @@
     f = hmm_yaml.predict_log_proba(s)
     assert_array_almost_equal(f, logp)
 
-@with_setup(setup_multivariate_mixed_dense)
-def test_hmm_multivariate_mixed_dense_robust_from_json():
+def test_hmm_multivariate_mixed_dense_robust_from_json(multivariate_mixed_dense):
+    model = multivariate_mixed_dense
     logp = numpy.array([[-1.3e-07, -32.35011618, -15.88142452, -48.83251208],
         [-0.0, -125.38948844, -20.25379271, -140.90667878],
         [-3.29e-06, -178.10391312, -12.6258323, -138.86820172]])
@@ -624,8 +607,8 @@
     f = hmm_json.predict_log_proba(s)
     assert_array_almost_equal(f, logp)
 
-@with_setup(setup_multivariate_gaussian_dense)
-def test_hmm_multivariate_gaussian_dense_predict_log_proba():
+def test_hmm_multivariate_gaussian_dense_predict_log_proba(multivariate_gaussian_dense):
+    model = multivariate_gaussian_dense
     f = model.predict_log_proba([[0, 1, 5, 2, 3], [2, 4, 1, 5, 6], [-4, 6, -2, 0, 1]])
     logp = numpy.array([[-0.01065581, -8.64827112, -4.56452191, -11.62376426],
         [-3.5e-07, -21.03621875, -14.85696805, -21.01998258],
@@ -634,8 +617,8 @@
     assert_array_almost_equal(f, logp)
 
 
-@with_setup(setup_multivariate_gaussian_dense)
-def test_hmm_multivariate_gaussian_dense_predict_log_proba_from_json():
+def test_hmm_multivariate_gaussian_dense_predict_log_proba_from_json(multivariate_gaussian_dense):
+    model = multivariate_gaussian_dense
     logp = numpy.array([[-0.01065581, -8.64827112, -4.56452191, -11.62376426],
         [-3.5e-07, -21.03621875, -14.85696805, -21.01998258],
         [-18.86968176, -0.07127267, -3.75635416, -3.09173086]])
@@ -645,8 +628,8 @@
     f = hmm_json.predict_log_proba(s)
     assert_array_almost_equal(f, logp)
 
-@with_setup(setup_multivariate_gaussian_dense)
-def test_hmm_multivariate_gaussian_dense_predict_log_proba_from_yaml():
+def test_hmm_multivariate_gaussian_dense_predict_log_proba_from_yaml(multivariate_gaussian_dense):
+    model = multivariate_gaussian_dense
     logp = numpy.array([[-0.01065581, -8.64827112, -4.56452191, -11.62376426],
         [-3.5e-07, -21.03621875, -14.85696805, -21.01998258],
         [-18.86968176, -0.07127267, -3.75635416, -3.09173086]])
@@ -656,8 +639,8 @@
     f = hmm_yaml.predict_log_proba(s)
     assert_array_almost_equal(f, logp)
 
-@with_setup(setup_multivariate_gaussian_dense)
-def test_hmm_multivariate_gaussian_dense_robust_from_json():
+def test_hmm_multivariate_gaussian_dense_robust_from_json(multivariate_gaussian_dense):
+    model = multivariate_gaussian_dense
     logp = numpy.array([[-0.01065581, -8.64827112, -4.56452191, -11.62376426],
         [-3.5e-07, -21.03621875, -14.85696805, -21.01998258],
         [-18.86968176, -0.07127267, -3.75635416, -3.09173086]])
@@ -667,8 +650,8 @@
     f = hmm_json.predict_log_proba(s)
     assert_array_almost_equal(f, logp)
 
-@with_setup(setup_univariate_discrete_dense)
-def test_hmm_univariate_discrete_dense_nan_predict_log_proba():
+def test_hmm_univariate_discrete_dense_nan_predict_log_proba(univariate_discrete_dense):
+    model = univariate_discrete_dense
     f = model.predict_log_proba(['A', nan, 'D', nan, 'C'])
     logp = numpy.array([[-0.35115579, -2.8652756, -2.8084989, -1.72113484],
         [-1.05957958, -2.32005107, -1.66860008, -1.0034317],
@@ -679,8 +662,8 @@
     assert_array_almost_equal(f, logp)
 
 
-@with_setup(setup_univariate_gaussian_dense)
-def test_hmm_univariate_gaussian_dense_nan_predict_log_proba():
+def test_hmm_univariate_gaussian_dense_nan_predict_log_proba(univariate_gaussian_dense):
+    model = univariate_gaussian_dense
     f = model.predict_log_proba([3, 5, 8, nan, 13])
     logp = numpy.array([[-0.78873673, -0.60590764, -12.89720502, -335.62509553],
         [-0.00042349, -8.53359013, -8.39210104, -242.13279062],
@@ -691,8 +674,8 @@
     assert_array_almost_equal(f, logp)
 
 
-@with_setup(setup_univariate_poisson_dense)
-def test_hmm_univariate_poisson_dense_nan_predict_log_proba():
+def test_hmm_univariate_poisson_dense_nan_predict_log_proba(univariate_poisson_dense):
+    model = univariate_poisson_dense
     f = model.predict_log_proba([5, 8, 2, nan, 7, nan, 2])
     logp = numpy.array([[-4.57617316, -1.97796947, -5.32345238, -0.16666868],
         [-2.12417036, -0.56894186, -10.24308433, -1.15738144],
@@ -705,8 +688,8 @@
     assert_array_almost_equal(f, logp)
 
 
-@with_setup(setup_multivariate_mixed_dense)
-def test_hmm_multivariate_mixed_dense_nan_predict_log_proba():
+def test_hmm_multivariate_mixed_dense_nan_predict_log_proba(multivariate_mixed_dense):
+    model = multivariate_mixed_dense
     f = model.predict_log_proba([[0, nan, 5, nan, 3], [nan, 4, 1, 5, 6], [4, nan, 2, nan, 1]])
     logp = numpy.array([[-0.0158986, -4.14953409, -13.70253942, -42.20831658],
         [-4.8e-07, -124.2732402, -14.55816895, -115.82582217],
@@ -715,8 +698,8 @@
     assert_array_almost_equal(f, logp)
 
 
-@with_setup(setup_multivariate_gaussian_dense)
-def test_hmm_multivariate_gaussian_dense_nan_predict_log_proba():
+def test_hmm_multivariate_gaussian_dense_nan_predict_log_proba(multivariate_gaussian_dense):
+    model = multivariate_gaussian_dense
     f = model.predict_log_proba([[0, nan, 5, nan, 3], [nan, 4, 1, 5, 6], [-4, nan, -2, nan, 1]])
     logp = numpy.array([[-0.05507459, -6.68858877, -2.95060899, -10.33217406],
         [-2.76e-06, -16.54254089, -12.82407236, -19.35159412],
@@ -725,8 +708,8 @@
     assert_array_almost_equal(f, logp)
 
 
-@with_setup(setup_univariate_discrete_dense)
-def test_hmm_univariate_discrete_dense_predict_proba():
+def test_hmm_univariate_discrete_dense_predict_proba(univariate_discrete_dense):
+    model = univariate_discrete_dense
     f = model.predict_proba(['A', 'B', 'D', 'D', 'C'])
     logp = numpy.array([[0.6466261, 0.04511127, 0.04574765, 0.26251498],
         [0.06392049, 0.83063013, 0.03603886, 0.06941051],
@@ -737,8 +720,8 @@
     assert_array_almost_equal(f, logp)
 
 
-@with_setup(setup_univariate_gaussian_dense)
-def test_hmm_univariate_gaussian_dense_predict_proba():
+def test_hmm_univariate_gaussian_dense_predict_proba(univariate_gaussian_dense):
+    model = univariate_gaussian_dense
     f = model.predict_proba([3, 5, 8, 19, 13])
     logp = numpy.array([[0.454357, 0.54564049, 2.51e-06, 0.0],
         [0.99937245, 0.00028692, 0.00034063, 0.0],
@@ -749,8 +732,8 @@
     assert_array_almost_equal(f, logp)
 
 
-@with_setup(setup_univariate_poisson_dense)
-def test_hmm_univariate_poisson_dense_predict_proba():
+def test_hmm_univariate_poisson_dense_predict_proba(univariate_poisson_dense):
+    model = univariate_poisson_dense
     f = model.predict_proba([5, 8, 2, 4, 7, 8, 2])
     logp = numpy.array([[0.01029863, 0.13836576, 0.00488487, 0.84645074],
         [0.11971517, 0.56351316, 3.573e-05, 0.31673595],
@@ -763,8 +746,8 @@
     assert_array_almost_equal(f, logp)
 
 
-@with_setup(setup_multivariate_mixed_dense)
-def test_hmm_multivariate_mixed_dense_predict_proba():
+def test_hmm_multivariate_mixed_dense_predict_proba(multivariate_mixed_dense):
+    model = multivariate_mixed_dense
     f = model.predict_proba([[0, 1, 5, 2, 3], [2, 4, 1, 5, 6], [4, 6, 2, 0, 1]])
     logp = numpy.array([[0.99999987, 0.0, 1.3e-07, 0.0],
         [1.0, 0.0, 0.0, 0.0],
@@ -772,8 +755,8 @@
 
     assert_array_almost_equal(f, logp)
 
-@with_setup(setup_multivariate_gaussian_dense)
-def test_hmm_multivariate_gaussian_dense_predict_proba():
+def test_hmm_multivariate_gaussian_dense_predict_proba(multivariate_gaussian_dense):
+    model = multivariate_gaussian_dense
     f = model.predict_proba([[0, 1, 5, 2, 3], [2, 4, 1, 5, 6], [-4, 6, -2, 0, 1]])
     logp = numpy.array([[0.98940076, 0.00017543, 0.01041486, 8.95e-06],
         [0.99999965, 0.0, 3.5e-07, 0.0],
@@ -782,8 +765,8 @@
     assert_array_almost_equal(f, logp)
 
 
-@with_setup(setup_univariate_discrete_dense)
-def test_hmm_univariate_discrete_dense_nan_predict_proba():
+def test_hmm_univariate_discrete_dense_nan_predict_proba(univariate_discrete_dense):
+    model = univariate_discrete_dense
     f = model.predict_proba(['A', nan, 'D', nan, 'C'])
     logp = numpy.array([[0.70387409, 0.05696743, 0.06029543, 0.17886305],
         [0.3466015, 0.09826857, 0.18851078, 0.36661915],
@@ -794,8 +777,8 @@
     assert_array_almost_equal(f, logp)
 
 
-@with_setup(setup_univariate_gaussian_dense)
-def test_hmm_univariate_gaussian_dense_nan_predict_proba():
+def test_hmm_univariate_gaussian_dense_nan_predict_proba(univariate_gaussian_dense):
+    model = univariate_gaussian_dense
     f = model.predict_proba([3, 5, 8, nan, 13])
     logp = numpy.array([[0.45441848, 0.54557901, 2.51e-06, 0.0],
         [0.9995766, 0.00019675, 0.00022665, 0.0],
@@ -806,8 +789,8 @@
     assert_array_almost_equal(f, logp)
 
 
-@with_setup(setup_univariate_poisson_dense)
-def test_hmm_univariate_poisson_dense_nan_predict_proba():
+def test_hmm_univariate_poisson_dense_nan_predict_proba(univariate_poisson_dense):
+    model = univariate_poisson_dense
     f = model.predict_proba([5, 8, 2, nan, 7, nan, 2])
     logp = numpy.array([[0.01029422, 0.13834988, 0.00487589, 0.84648002],
         [0.1195321, 0.56612416, 3.56e-05, 0.31430814],
@@ -820,8 +803,8 @@
     assert_array_almost_equal(f, logp)
 
 
-@with_setup(setup_multivariate_mixed_dense)
-def test_hmm_multivariate_mixed_dense_nan_predict_proba():
+def test_hmm_multivariate_mixed_dense_nan_predict_proba(multivariate_mixed_dense):
+    model = multivariate_mixed_dense
     f = model.predict_proba([[0, nan, 5, nan, 3], [nan, 4, 1, 5, 6], [4, nan, 2, nan, 1]])
     logp = numpy.array([[0.98422712, 0.01577176, 1.12e-06, 0.0],
         [0.99999952, 0.0, 4.8e-07, 0.0],
@@ -830,8 +813,8 @@
     assert_array_almost_equal(f, logp)
 
 
-@with_setup(setup_multivariate_gaussian_dense)
-def test_hmm_multivariate_gaussian_dense_nan_predict_proba():
+def test_hmm_multivariate_gaussian_dense_nan_predict_proba(multivariate_gaussian_dense):
+    model = multivariate_gaussian_dense
     f = model.predict_proba([[0, nan, 5, nan, 3], [nan, 4, 1, 5, 6], [-4, nan, -2, nan, 1]])
     logp = numpy.array([[0.94641455, 0.00124504, 0.05230784, 3.257e-05],
         [0.99999724, 7e-08, 2.7e-06, 0.0],
@@ -840,119 +823,119 @@
     assert_array_almost_equal(f, logp)
 
 
-@with_setup(setup_univariate_discrete_dense)
-def test_hmm_univariate_discrete_dense_predict():
+def test_hmm_univariate_discrete_dense_predict(univariate_discrete_dense):
+    model = univariate_discrete_dense
     f = model.predict(['A', 'B', 'D', 'D', 'C'])
     path = [0, 1, 3, 3, 2]
 
     assert_array_almost_equal(f, path)
 
 
-@with_setup(setup_univariate_gaussian_dense)
-def test_hmm_univariate_gaussian_dense_predict():
+def test_hmm_univariate_gaussian_dense_predict(univariate_gaussian_dense):
+    model = univariate_gaussian_dense
     f = model.predict([3, 5, 8, 19, 13])
     path = [1, 0, 2, 2, 2]
 
     assert_array_almost_equal(f, path)
 
 
-@with_setup(setup_univariate_poisson_dense)
-def test_hmm_univariate_poisson_dense_predict():
+def test_hmm_univariate_poisson_dense_predict(univariate_poisson_dense):
+    model = univariate_poisson_dense
     f = model.predict([5, 8, 2, 4, 7, 8, 2])
     path = [3, 1, 2, 3, 3, 1, 2]
 
     assert_array_almost_equal(f, path)
 
 
-@with_setup(setup_multivariate_mixed_dense)
-def test_hmm_multivariate_mixed_dense_predict():
+def test_hmm_multivariate_mixed_dense_predict(multivariate_mixed_dense):
+    model = multivariate_mixed_dense
     f = model.predict([[0, 1, 5, 2, 3], [2, 4, 1, 5, 6], [4, 6, 2, 0, 1]])
     path = [0, 0, 0]
 
     assert_array_almost_equal(f, path)
 
-@with_setup(setup_multivariate_gaussian_dense)
-def test_hmm_multivariate_gaussian_dense_predict():
+def test_hmm_multivariate_gaussian_dense_predict(multivariate_gaussian_dense):
+    model = multivariate_gaussian_dense
     f = model.predict([[0, 1, 5, 2, 3], [2, 4, 1, 5, 6], [-4, 6, -2, 0, 1]])
     path = [0, 0, 1]
 
     assert_array_almost_equal(f, path)
 
 
-@with_setup(setup_univariate_discrete_dense)
-def test_hmm_univariate_discrete_dense_nan_predict():
+def test_hmm_univariate_discrete_dense_nan_predict(univariate_discrete_dense):
+    model = univariate_discrete_dense
     f = model.predict(['A', nan, 'D', nan, 'C'])
     path = [0, 3, 3, 1, 2]
 
     assert_array_almost_equal(f, path)
 
 
-@with_setup(setup_univariate_gaussian_dense)
-def test_hmm_univariate_gaussian_dense_nan_predict():
+def test_hmm_univariate_gaussian_dense_nan_predict(univariate_gaussian_dense):
+    model = univariate_gaussian_dense
     f = model.predict([3, 5, 8, nan, 13])
     path = [1, 0, 0, 2, 2]
 
     assert_array_almost_equal(f, path)
 
 
-@with_setup(setup_univariate_poisson_dense)
-def test_hmm_univariate_poisson_dense_nan_predict():
+def test_hmm_univariate_poisson_dense_nan_predict(univariate_poisson_dense):
+    model = univariate_poisson_dense
     f = model.predict([5, 8, 2, nan, 7, nan, 2])
     path = [3, 1, 2, 3, 3, 3, 2]
 
     assert_array_almost_equal(f, path)
 
 
-@with_setup(setup_multivariate_mixed_dense)
-def test_hmm_multivariate_mixed_dense_nan_predict():
+def test_hmm_multivariate_mixed_dense_nan_predict(multivariate_mixed_dense):
+    model = multivariate_mixed_dense
     f = model.predict([[0, nan, 5, nan, 3], [nan, 4, 1, 5, 6], [4, nan, 2, nan, 1]])
     path = [0, 0, 0]
 
     assert_array_almost_equal(f, path)
 
 
-@with_setup(setup_multivariate_gaussian_dense)
-def test_hmm_multivariate_gaussian_dense_nan_predict():
+def test_hmm_multivariate_gaussian_dense_nan_predict(multivariate_gaussian_dense):
+    model = multivariate_gaussian_dense
     f = model.predict([[0, nan, 5, nan, 3], [nan, 4, 1, 5, 6], [-4, nan, -2, nan, 1]])
     path = [0, 0, 1]
 
     assert_array_almost_equal(f, path)
 
 
-@with_setup(setup_univariate_discrete_dense)
-def test_hmm_univariate_discrete_dense_predict_viterbi():
+def test_hmm_univariate_discrete_dense_predict_viterbi(univariate_discrete_dense):
+    model = univariate_discrete_dense
     f = model.predict(['A', 'B', 'D', 'D', 'C'], algorithm='viterbi')
     path = [4, 0, 1, 3, 3, 2, 5]
 
     assert_array_almost_equal(f, path)
 
 
-@with_setup(setup_univariate_gaussian_dense)
-def test_hmm_univariate_gaussian_dense_predict_viterbi():
+def test_hmm_univariate_gaussian_dense_predict_viterbi(univariate_gaussian_dense):
+    model = univariate_gaussian_dense
     f = model.predict([3, 5, 8, 19, 13], algorithm='viterbi')
     path = [4, 1, 0, 2, 2, 2, 5]
 
     assert_array_almost_equal(f, path)
 
 
-@with_setup(setup_univariate_poisson_dense)
-def test_hmm_univariate_poisson_dense_predict_viterbi():
+def test_hmm_univariate_poisson_dense_predict_viterbi(univariate_poisson_dense):
+    model = univariate_poisson_dense
     f = model.predict([5, 8, 2, 4, 7, 8, 2], algorithm='viterbi')
     path = [4, 3, 1, 2, 3, 3, 1, 2, 5]
 
     assert_array_almost_equal(f, path)
 
 
-@with_setup(setup_multivariate_mixed_dense)
-def test_hmm_multivariate_mixed_dense_predict_viterbi():
+def test_hmm_multivariate_mixed_dense_predict_viterbi(multivariate_mixed_dense):
+    model = multivariate_mixed_dense
     f = model.predict([[0, 1, 5, 2, 3], [2, 4, 1, 5, 6], [4, 6, 2, 0, 1]],
         algorithm='viterbi')
     path = [4, 0, 0, 0, 5]
 
     assert_array_almost_equal(f, path)
 
-@with_setup(setup_multivariate_gaussian_dense)
-def test_hmm_multivariate_gaussian_dense_predict_viterbi():
+def test_hmm_multivariate_gaussian_dense_predict_viterbi(multivariate_gaussian_dense):
+    model = multivariate_gaussian_dense
     f = model.predict([[0, 1, 5, 2, 3], [2, 4, 1, 5, 6], [-4, 6, -2, 0, 1]],
         algorithm='viterbi')
     path = [4, 0, 0, 1, 5]
@@ -960,32 +943,32 @@
     assert_array_almost_equal(f, path)
 
 
-@with_setup(setup_univariate_discrete_dense)
-def test_hmm_univariate_discrete_dense_nan_predict_viterbi():
+def test_hmm_univariate_discrete_dense_nan_predict_viterbi(univariate_discrete_dense):
+    model = univariate_discrete_dense
     f = model.predict(['A', nan, 'D', nan, 'C'], algorithm='viterbi')
     path = [4, 0, 0, 3, 1, 2, 5]
 
     assert_array_almost_equal(f, path)
 
 
-@with_setup(setup_univariate_gaussian_dense)
-def test_hmm_univariate_gaussian_dense_nan_predict_viterbi():
+def test_hmm_univariate_gaussian_dense_nan_predict_viterbi(univariate_gaussian_dense):
+    model = univariate_gaussian_dense
     f = model.predict([3, 5, 8, nan, 13], algorithm='viterbi')
     path = [4, 1, 0, 0, 0, 2, 5]
 
     assert_array_almost_equal(f, path)
 
 
-@with_setup(setup_univariate_poisson_dense)
-def test_hmm_univariate_poisson_dense_nan_predict_viterbi():
+def test_hmm_univariate_poisson_dense_nan_predict_viterbi(univariate_poisson_dense):
+    model = univariate_poisson_dense
     f = model.predict([5, 8, 2, nan, 7, nan, 2], algorithm='viterbi')
     path = [4, 3, 1, 2, 3, 3, 1, 2, 5]
 
     assert_array_almost_equal(f, path)
 
 
-@with_setup(setup_multivariate_mixed_dense)
-def test_hmm_multivariate_mixed_dense_nan_predict_viterbi():
+def test_hmm_multivariate_mixed_dense_nan_predict_viterbi(multivariate_mixed_dense):
+    model = multivariate_mixed_dense
     f = model.predict([[0, nan, 5, nan, 3], [nan, 4, 1, 5, 6], [4, nan, 2, nan, 1]],
         algorithm='viterbi')
     path = [4, 0, 0, 0, 5]
@@ -993,8 +976,8 @@
     assert_array_almost_equal(f, path)
 
 
-@with_setup(setup_multivariate_gaussian_dense)
-def test_hmm_multivariate_gaussian_dense_nan_predict_viterbi():
+def test_hmm_multivariate_gaussian_dense_nan_predict_viterbi(multivariate_gaussian_dense):
+    model = multivariate_gaussian_dense
     f = model.predict([[0, nan, 5, nan, 3], [nan, 4, 1, 5, 6], [-4, nan, -2, nan, 1]],
         algorithm='viterbi')
     path = [4, 0, 0, 1, 5]
@@ -1002,8 +985,7 @@
     assert_array_almost_equal(f, path)
 
 
-@with_setup(setup, teardown)
-def test_hmm_viterbi_fit():
+def test_hmm_viterbi_fit(model):
     seqs = [list(x) for x in ['ACT', 'ACT', 'ACC', 'ACTC', 'ACT', 'ACT', 'CCT',
     'CCC', 'AAT', 'CT', 'AT', 'CT', 'CT', 'CT', 'CT', 'CT', 'CT',
     'ACT', 'ACT', 'CT', 'ACT', 'CT', 'CT', 'CT', 'CT']]
@@ -1015,11 +997,10 @@
                                use_pseudocount=True)
 
     total_improvement = history.total_improvement[-1]
-    assert_equal(round(total_improvement, 4), 83.2834)
+    assert round(total_improvement, 4) == 83.2834
 
 
-@with_setup(setup, teardown)
-def test_hmm_viterbi_fit_no_pseudocount():
+def test_hmm_viterbi_fit_no_pseudocount(model):
     seqs = [list(x) for x in ['ACT', 'ACT', 'ACC', 'ACTC', 'ACT', 'ACT', 'CCT',
     'CCC', 'AAT', 'CT', 'AT', 'CT', 'CT', 'CT', 'CT', 'CT', 'CT',
     'ACT', 'ACT', 'CT', 'ACT', 'CT', 'CT', 'CT', 'CT']]
@@ -1031,11 +1012,10 @@
                                      use_pseudocount=False)
 
     total_improvement = history.total_improvement[-1]
-    assert_equal(round(total_improvement, 4), 84.9318)
+    assert round(total_improvement, 4) == 84.9318
 
 
-@with_setup(setup, teardown)
-def test_hmm_viterbi_fit_w_pseudocount():
+def test_hmm_viterbi_fit_w_pseudocount(model):
     seqs = [list(x) for x in ['ACT', 'ACT', 'ACC', 'ACTC', 'ACT', 'ACT', 'CCT',
     'CCC', 'AAT', 'CT', 'AT', 'CT', 'CT', 'CT', 'CT', 'CT', 'CT',
     'ACT', 'ACT', 'CT', 'ACT', 'CT', 'CT', 'CT', 'CT']]
@@ -1047,11 +1027,10 @@
                                      transition_pseudocount=1.)
 
     total_improvement = history.total_improvement[-1]
-    assert_equal(round(total_improvement, 4), 79.4713)
+    assert round(total_improvement, 4) == 79.4713
 
 
-@with_setup(setup, teardown)
-def test_hmm_viterbi_fit_w_pseudocount_priors():
+def test_hmm_viterbi_fit_w_pseudocount_priors(model):
     seqs = [list(x) for x in ['ACT', 'ACT', 'ACC', 'ACTC', 'ACT', 'ACT', 'CCT',
     'CCC', 'AAT', 'CT', 'AT', 'CT', 'CT', 'CT', 'CT', 'CT', 'CT',
     'ACT', 'ACT', 'CT', 'ACT', 'CT', 'CT', 'CT', 'CT']]
@@ -1064,11 +1043,10 @@
                                      use_pseudocount=True)
 
     total_improvement = history.total_improvement[-1]
-    assert_equal(round(total_improvement, 4), 81.7439)
+    assert round(total_improvement, 4) == 81.7439
 
 
-@with_setup(setup, teardown)
-def test_hmm_viterbi_fit_w_inertia():
+def test_hmm_viterbi_fit_w_inertia(model):
     seqs = [list(x) for x in ['ACT', 'ACT', 'ACC', 'ACTC', 'ACT', 'ACT', 'CCT',
     'CCC', 'AAT', 'CT', 'AT', 'CT', 'CT', 'CT', 'CT', 'CT', 'CT',
     'ACT', 'ACT', 'CT', 'ACT', 'CT', 'CT', 'CT', 'CT']]
@@ -1080,11 +1058,10 @@
                                      edge_inertia=0.193)
 
     total_improvement = history.total_improvement[-1]
-    assert_equal(round(total_improvement, 4), 84.9318)
+    assert round(total_improvement, 4) == 84.9318
 
 
-@with_setup(setup, teardown)
-def test_hmm_viterbi_fit_w_inertia2():
+def test_hmm_viterbi_fit_w_inertia2(model):
     seqs = [list(x) for x in ['ACT', 'ACT', 'ACC', 'ACTC', 'ACT', 'ACT', 'CCT',
     'CCC', 'AAT', 'CT', 'AT', 'CT', 'CT', 'CT', 'CT', 'CT', 'CT',
     'ACT', 'ACT', 'CT', 'ACT', 'CT', 'CT', 'CT', 'CT']]
@@ -1096,11 +1073,10 @@
                                      edge_inertia=0.82)
 
     total_improvement = history.total_improvement[-1]
-    assert_equal(round(total_improvement, 4), 84.9318)
+    assert round(total_improvement, 4) == 84.9318
 
 
-@with_setup(setup, teardown)
-def test_hmm_viterbi_fit_w_pseudocount_inertia():
+def test_hmm_viterbi_fit_w_pseudocount_inertia(model):
     seqs = [list(x) for x in ['ACT', 'ACT', 'ACC', 'ACTC', 'ACT', 'ACT', 'CCT',
     'CCC', 'AAT', 'CT', 'AT', 'CT', 'CT', 'CT', 'CT', 'CT', 'CT',
     'ACT', 'ACT', 'CT', 'ACT', 'CT', 'CT', 'CT', 'CT']]
@@ -1113,10 +1089,9 @@
                                      use_pseudocount=True)
 
     total_improvement = history.total_improvement[-1]
-    assert_equal(round(total_improvement, 4), 83.2834)
+    assert round(total_improvement, 4) == 83.2834
 
-@with_setup(setup, teardown)
-def test_hmm_viterbi_fit_one_check_input():
+def test_hmm_viterbi_fit_one_check_input(model):
     seqs = [list(x) for x in ['ACT', 'ACT', 'ACC', 'ACTC', 'ACT', 'ACT', 'CCT',
     'CCC', 'AAT', 'CT', 'AT', 'CT', 'CT', 'CT', 'CT', 'CT', 'CT',
     'ACT', 'ACT', 'CT', 'ACT', 'CT', 'CT', 'CT', 'CT']]
@@ -1129,10 +1104,9 @@
                                multiple_check_input=False)
 
     total_improvement = history.total_improvement[-1]
-    assert_equal(round(total_improvement, 4), 83.2834)
+    assert round(total_improvement, 4) == 83.2834
 
-@with_setup(setup, teardown)
-def test_hmm_bw_fit():
+def test_hmm_bw_fit(model):
     seqs = [list(x) for x in ['ACT', 'ACT', 'ACC', 'ACTC', 'ACT', 'ACT', 'CCT',
         'CCC', 'AAT', 'CT', 'AT', 'CT', 'CT', 'CT', 'CT', 'CT', 'CT',
         'ACT', 'ACT', 'CT', 'ACT', 'CT', 'CT', 'CT', 'CT']]
@@ -1145,11 +1119,11 @@
                                      max_iterations=5)
 
     total_improvement = history.total_improvement[-1]
-    assert_equal(round(total_improvement, 4), 83.1132)
+    assert round(total_improvement, 4) == 83.1132
 
 
-@with_setup(setup_multivariate_discrete_sparse, teardown)
-def test_hmm_bw_multivariate_discrete_fit():
+def test_hmm_bw_multivariate_discrete_fit(multivariate_discrete_sparse):
+    model = multivariate_discrete_sparse
     seqs = [[['A', 'A'], ['A', 'C'], ['C', 'T']], [['A', 'A'], ['C', 'C'], ['T', 'T']],
             [['A', 'A'], ['A', 'C'], ['C', 'C'], ['T', 'T']], [['A', 'A'], ['C', 'C']]]
 
@@ -1161,11 +1135,11 @@
                                      max_iterations=5)
 
     total_improvement = history.total_improvement[-1]
-    assert_equal(round(total_improvement, 4), 13.3622)
+    assert round(total_improvement, 4) == 13.3622
 
 
-@with_setup(setup_multivariate_discrete_sparse, teardown)
-def test_hmm_bw_multivariate_discrete_fit_json_yaml():
+def test_hmm_bw_multivariate_discrete_fit_json_yaml(multivariate_discrete_sparse):
+    model = multivariate_discrete_sparse
     seqs = [[['A', 'A'], ['A', 'C'], ['C', 'T']], [['A', 'A'], ['C', 'C'], ['T', 'T']],
             [['A', 'A'], ['A', 'C'], ['C', 'C'], ['T', 'T']], [['A', 'A'], ['C', 'C']]]
 
@@ -1178,10 +1152,10 @@
                                      max_iterations=5)
 
     total_improvement = history.total_improvement[-1]
-    assert_equal(round(total_improvement, 4), 13.3622)
+    assert round(total_improvement, 4) == 13.3622
 
-@with_setup(setup_multivariate_discrete_sparse, teardown)
-def test_hmm_bw_multivariate_discrete_fit_robust_from_json():
+def test_hmm_bw_multivariate_discrete_fit_robust_from_json(multivariate_discrete_sparse):
+    model = multivariate_discrete_sparse
     seqs = [[['A', 'A'], ['A', 'C'], ['C', 'T']], [['A', 'A'], ['C', 'C'], ['T', 'T']],
             [['A', 'A'], ['A', 'C'], ['C', 'C'], ['T', 'T']], [['A', 'A'], ['C', 'C']]]
 
@@ -1194,10 +1168,10 @@
                                      max_iterations=5)
 
     total_improvement = history.total_improvement[-1]
-    assert_equal(round(total_improvement, 4), 13.3622)
+    assert round(total_improvement, 4) == 13.3622
 
-@with_setup(setup_multivariate_discrete_sparse, teardown)
-def test_hmm_bw_multivariate_discrete_fit_from_yaml():
+def test_hmm_bw_multivariate_discrete_fit_from_yaml(multivariate_discrete_sparse):
+    model = multivariate_discrete_sparse
     seqs = [[['A', 'A'], ['A', 'C'], ['C', 'T']], [['A', 'A'], ['C', 'C'], ['T', 'T']],
             [['A', 'A'], ['A', 'C'], ['C', 'C'], ['T', 'T']], [['A', 'A'], ['C', 'C']]]
 
@@ -1210,11 +1184,11 @@
                               max_iterations=5)
 
     total_improvement = history.total_improvement[-1]
-    assert_equal(round(total_improvement, 4), 13.3622)
+    assert round(total_improvement, 4) == 13.3622
 
 
-@with_setup(setup_multivariate_gaussian_sparse, teardown)
-def test_hmm_bw_multivariate_gaussian_fit():
+def test_hmm_bw_multivariate_gaussian_fit(multivariate_gaussian_sparse):
+    model = multivariate_gaussian_sparse
     seqs = [[[5, 8], [8, 10], [13, 17], [-3, -4]], [[6, 7], [13, 16], [12, 11], [-6, -7]],
             [[4, 6], [13, 15], [-4, -7]], [[6, 5], [14, 18], [-7, -5]]]
 
@@ -1226,10 +1200,10 @@
                                      max_iterations=5)
 
     total_improvement = history.total_improvement[-1]
-    assert_equal(round(total_improvement, 4), 24.7013)
+    assert round(total_improvement, 4) == 24.7013
 
-@with_setup(setup_multivariate_gaussian_sparse, teardown)
-def test_hmm_bw_multivariate_gaussian_from_json():
+def test_hmm_bw_multivariate_gaussian_from_json(multivariate_gaussian_sparse):
+    model = multivariate_gaussian_sparse
     seqs = [[[5, 8], [8, 10], [13, 17], [-3, -4]], [[6, 7], [13, 16], [12, 11], [-6, -7]],
             [[4, 6], [13, 15], [-4, -7]], [[6, 5], [14, 18], [-7, -5]]]
 
@@ -1242,10 +1216,10 @@
                                      max_iterations=5)
 
     total_improvement = history.total_improvement[-1]
-    assert_equal(round(total_improvement, 4), 24.7013)
+    assert round(total_improvement, 4) == 24.7013
 
-@with_setup(setup_multivariate_gaussian_sparse, teardown)
-def test_hmm_bw_multivariate_gaussian_robust_from_json():
+def test_hmm_bw_multivariate_gaussian_robust_from_json(multivariate_gaussian_sparse):
+    model = multivariate_gaussian_sparse
     seqs = [[[5, 8], [8, 10], [13, 17], [-3, -4]], [[6, 7], [13, 16], [12, 11], [-6, -7]],
             [[4, 6], [13, 15], [-4, -7]], [[6, 5], [14, 18], [-7, -5]]]
 
@@ -1258,10 +1232,10 @@
                                      max_iterations=5)
 
     total_improvement = history.total_improvement[-1]
-    assert_equal(round(total_improvement, 4), 24.7013)
+    assert round(total_improvement, 4) == 24.7013
 
-@with_setup(setup_multivariate_gaussian_sparse, teardown)
-def test_hmm_bw_multivariate_gaussian_from_yaml(): 
+def test_hmm_bw_multivariate_gaussian_from_yaml(multivariate_gaussian_sparse):
+    model = multivariate_gaussian_sparse
     seqs = [[[5, 8], [8, 10], [13, 17], [-3, -4]], [[6, 7], [13, 16], [12, 11], [-6, -7]],
             [[4, 6], [13, 15], [-4, -7]], [[6, 5], [14, 18], [-7, -5]]]
             
@@ -1274,10 +1248,9 @@
                               max_iterations=5)
 
     total_improvement = history.total_improvement[-1]
-    assert_equal(round(total_improvement, 4), 24.7013)
+    assert round(total_improvement, 4) == 24.7013
 
-@with_setup(setup, teardown)
-def test_hmm_bw_fit_json():
+def test_hmm_bw_fit_json(model):
     seqs = [list(x) for x in ['ACT', 'ACT', 'ACC', 'ACTC', 'ACT', 'ACT', 'CCT',
         'CCC', 'AAT', 'CT', 'AT', 'CT', 'CT', 'CT', 'CT', 'CT', 'CT',
         'ACT', 'ACT', 'CT', 'ACT', 'CT', 'CT', 'CT', 'CT']]
@@ -1291,14 +1264,13 @@
 
     total_improvement = history.total_improvement[-1]
 
-    assert_equal(round(total_improvement, 4), 83.1132)
+    assert round(total_improvement, 4) == 83.1132
     assert_almost_equal(sum(model.log_probability(seq) for seq in seqs), -42.2341, 4)
 
     hmm = HiddenMarkovModel.from_json(model.to_json())
     assert_almost_equal(sum(model.log_probability(seq) for seq in seqs), -42.2341, 4)
 
-@with_setup(setup, teardown)
-def test_hmm_bw_fit_robust_from_json():
+def test_hmm_bw_fit_robust_from_json(model):
     seqs = [list(x) for x in ['ACT', 'ACT', 'ACC', 'ACTC', 'ACT', 'ACT', 'CCT',
         'CCC', 'AAT', 'CT', 'AT', 'CT', 'CT', 'CT', 'CT', 'CT', 'CT',
         'ACT', 'ACT', 'CT', 'ACT', 'CT', 'CT', 'CT', 'CT']]
@@ -1312,14 +1284,13 @@
 
     total_improvement = history.total_improvement[-1]
 
-    assert_equal(round(total_improvement, 4), 83.1132)
+    assert round(total_improvement, 4) == 83.1132
     assert_almost_equal(sum(model.log_probability(seq) for seq in seqs), -42.2341, 4)
 
     hmm = from_json(model.to_json())
     assert_almost_equal(sum(model.log_probability(seq) for seq in seqs), -42.2341, 4)
 
-@with_setup(setup, teardown)
-def test_hmm_bw_fit_no_pseudocount():
+def test_hmm_bw_fit_no_pseudocount(model):
     seqs = [list(x) for x in ['ACT', 'ACT', 'ACC', 'ACTC', 'ACT', 'ACT', 'CCT',
         'CCC', 'AAT', 'CT', 'AT', 'CT', 'CT', 'CT', 'CT', 'CT', 'CT',
         'ACT', 'ACT', 'CT', 'ACT', 'CT', 'CT', 'CT', 'CT']]
@@ -1332,11 +1303,10 @@
                                      max_iterations=5)
 
     total_improvement = history.total_improvement[-1]
-    assert_equal(round(total_improvement, 4), 85.681)
+    assert round(total_improvement, 4) == 85.681
 
 
-@with_setup(setup, teardown)
-def test_hmm_bw_fit_w_pseudocount():
+def test_hmm_bw_fit_w_pseudocount(model):
     seqs = [list(x) for x in ['ACT', 'ACT', 'ACC', 'ACTC', 'ACT', 'ACT', 'CCT',
         'CCC', 'AAT', 'CT', 'AT', 'CT', 'CT', 'CT', 'CT', 'CT', 'CT',
         'ACT', 'ACT', 'CT', 'ACT', 'CT', 'CT', 'CT', 'CT']]
@@ -1349,11 +1319,10 @@
                                      max_iterations=5)
 
     total_improvement = history.total_improvement[-1]
-    assert_equal(round(total_improvement, 4), 84.9408)
+    assert round(total_improvement, 4) == 84.9408
 
 
-@with_setup(setup, teardown)
-def test_hmm_bw_fit_w_pseudocount_priors():
+def test_hmm_bw_fit_w_pseudocount_priors(model):
     seqs = [list(x) for x in ['ACT', 'ACT', 'ACC', 'ACTC', 'ACT', 'ACT', 'CCT',
         'CCC', 'AAT', 'CT', 'AT', 'CT', 'CT', 'CT', 'CT', 'CT', 'CT',
         'ACT', 'ACT', 'CT', 'ACT', 'CT', 'CT', 'CT', 'CT']]
@@ -1367,11 +1336,10 @@
                                      max_iterations=5)
 
     total_improvement = history.total_improvement[-1]
-    assert_equal(round(total_improvement, 4), 81.2265)
+    assert round(total_improvement, 4) == 81.2265
 
 
-@with_setup(setup, teardown)
-def test_hmm_bw_fit_w_inertia():
+def test_hmm_bw_fit_w_inertia(model):
     seqs = [list(x) for x in ['ACT', 'ACT', 'ACC', 'ACTC', 'ACT', 'ACT', 'CCT',
         'CCC', 'AAT', 'CT', 'AT', 'CT', 'CT', 'CT', 'CT', 'CT', 'CT',
         'ACT', 'ACT', 'CT', 'ACT', 'CT', 'CT', 'CT', 'CT']]
@@ -1384,11 +1352,10 @@
                                      max_iterations=5)
 
     total_improvement = history.total_improvement[-1]
-    assert_equal(round(total_improvement, 4), 85.0528)
+    assert round(total_improvement, 4) == 85.0528
 
 
-@with_setup(setup, teardown)
-def test_hmm_bw_fit_w_inertia2():
+def test_hmm_bw_fit_w_inertia2(model):
     seqs = [list(x) for x in ['ACT', 'ACT', 'ACC', 'ACTC', 'ACT', 'ACT', 'CCT',
         'CCC', 'AAT', 'CT', 'AT', 'CT', 'CT', 'CT', 'CT', 'CT', 'CT',
         'ACT', 'ACT', 'CT', 'ACT', 'CT', 'CT', 'CT', 'CT']]
@@ -1401,11 +1368,10 @@
                                      max_iterations=5)
 
     total_improvement = history.total_improvement[-1]
-    assert_equal(round(total_improvement, 4), 72.5134)
+    assert round(total_improvement, 4) == 72.5134
 
 
-@with_setup(setup, teardown)
-def test_hmm_bw_fit_w_pseudocount_inertia():
+def test_hmm_bw_fit_w_pseudocount_inertia(model):
     seqs = [list(x) for x in ['ACT', 'ACT', 'ACC', 'ACTC', 'ACT', 'ACT', 'CCT',
         'CCC', 'AAT', 'CT', 'AT', 'CT', 'CT', 'CT', 'CT', 'CT', 'CT',
         'ACT', 'ACT', 'CT', 'ACT', 'CT', 'CT', 'CT', 'CT']]
@@ -1419,11 +1385,10 @@
                                      max_iterations=5)
 
     total_improvement = history.total_improvement[-1]
-    assert_equal(round(total_improvement, 4), 83.0764)
+    assert round(total_improvement, 4) == 83.0764
 
 
-@with_setup(setup, teardown)
-def test_hmm_bw_fit_w_frozen_distributions():
+def test_hmm_bw_fit_w_frozen_distributions(model):
     seqs = [list(x) for x in ['ACT', 'ACT', 'ACC', 'ACTC', 'ACT', 'ACT', 'CCT',
         'CCC', 'AAT', 'CT', 'AT', 'CT', 'CT', 'CT', 'CT', 'CT', 'CT',
         'ACT', 'ACT', 'CT', 'ACT', 'CT', 'CT', 'CT', 'CT']]
@@ -1436,11 +1401,10 @@
                                      max_iterations=5)
 
     total_improvement = history.total_improvement[-1]
-    assert_equal(round(total_improvement, 4), 64.474)
+    assert round(total_improvement, 4) == 64.474
 
 
-@with_setup(setup, teardown)
-def test_hmm_bw_fit_w_frozen_edges():
+def test_hmm_bw_fit_w_frozen_edges(model):
     seqs = [list(x) for x in ['ACT', 'ACT', 'ACC', 'ACTC', 'ACT', 'ACT', 'CCT',
         'CCC', 'AAT', 'CT', 'AT', 'CT', 'CT', 'CT', 'CT', 'CT', 'CT',
         'ACT', 'ACT', 'CT', 'ACT', 'CT', 'CT', 'CT', 'CT']]
@@ -1453,11 +1417,10 @@
                                      max_iterations=5)
 
     total_improvement = history.total_improvement[-1]
-    assert_equal(round(total_improvement, 4), 44.0208)
+    assert round(total_improvement, 4) == 44.0208
 
 
-@with_setup(setup, teardown)
-def test_hmm_bw_fit_w_edge_a_distribution_inertia():
+def test_hmm_bw_fit_w_edge_a_distribution_inertia(model):
     seqs = [list(x) for x in ['ACT', 'ACT', 'ACC', 'ACTC', 'ACT', 'ACT', 'CCT',
         'CCC', 'AAT', 'CT', 'AT', 'CT', 'CT', 'CT', 'CT', 'CT', 'CT',
         'ACT', 'ACT', 'CT', 'ACT', 'CT', 'CT', 'CT', 'CT']]
@@ -1471,11 +1434,10 @@
                                      max_iterations=5)
 
     total_improvement = history.total_improvement[-1]
-    assert_equal(round(total_improvement, 4), 81.5447)
+    assert round(total_improvement, 4) == 81.5447
 
 
-@with_setup(setup, teardown)
-def test_hmm_bw_fit_parallel():
+def test_hmm_bw_fit_parallel(model):
     seqs = [list(x) for x in ['ACT', 'ACT', 'ACC', 'ACTC', 'ACT', 'ACT', 'CCT',
         'CCC', 'AAT', 'CT', 'AT', 'CT', 'CT', 'CT', 'CT', 'CT', 'CT',
         'ACT', 'ACT', 'CT', 'ACT', 'CT', 'CT', 'CT', 'CT']]
@@ -1489,11 +1451,10 @@
                                      n_jobs=2)
 
     total_improvement = history.total_improvement[-1]
-    assert_equal(round(total_improvement, 4), 83.1132)
+    assert round(total_improvement, 4) == 83.1132
 
 
-@with_setup(setup, teardown)
-def test_hmm_bw_fit_no_pseudocount_parallel():
+def test_hmm_bw_fit_no_pseudocount_parallel(model):
     seqs = [list(x) for x in ['ACT', 'ACT', 'ACC', 'ACTC', 'ACT', 'ACT', 'CCT',
         'CCC', 'AAT', 'CT', 'AT', 'CT', 'CT', 'CT', 'CT', 'CT', 'CT',
         'ACT', 'ACT', 'CT', 'ACT', 'CT', 'CT', 'CT', 'CT']]
@@ -1507,11 +1468,10 @@
                                      n_jobs=2)
 
     total_improvement = history.total_improvement[-1]
-    assert_equal(round(total_improvement, 4), 85.681)
+    assert round(total_improvement, 4) == 85.681
 
 
-@with_setup(setup, teardown)
-def test_hmm_bw_fit_w_pseudocount_parallel():
+def test_hmm_bw_fit_w_pseudocount_parallel(model):
     seqs = [list(x) for x in ['ACT', 'ACT', 'ACC', 'ACTC', 'ACT', 'ACT', 'CCT',
         'CCC', 'AAT', 'CT', 'AT', 'CT', 'CT', 'CT', 'CT', 'CT', 'CT',
         'ACT', 'ACT', 'CT', 'ACT', 'CT', 'CT', 'CT', 'CT']]
@@ -1525,11 +1485,10 @@
                                      n_jobs=2)
 
     total_improvement = history.total_improvement[-1]
-    assert_equal(round(total_improvement, 4), 84.9408)
+    assert round(total_improvement, 4) == 84.9408
 
 
-@with_setup(setup, teardown)
-def test_hmm_bw_fit_w_pseudocount_priors_parallel():
+def test_hmm_bw_fit_w_pseudocount_priors_parallel(model):
     seqs = [list(x) for x in ['ACT', 'ACT', 'ACC', 'ACTC', 'ACT', 'ACT', 'CCT',
         'CCC', 'AAT', 'CT', 'AT', 'CT', 'CT', 'CT', 'CT', 'CT', 'CT',
         'ACT', 'ACT', 'CT', 'ACT', 'CT', 'CT', 'CT', 'CT']]
@@ -1544,11 +1503,10 @@
                                      n_jobs=2)
 
     total_improvement = history.total_improvement[-1]
-    assert_equal(round(total_improvement, 4), 81.2265)
+    assert round(total_improvement, 4) == 81.2265
 
 
-@with_setup(setup, teardown)
-def test_hmm_bw_fit_w_inertia_parallel():
+def test_hmm_bw_fit_w_inertia_parallel(model):
     seqs = [list(x) for x in ['ACT', 'ACT', 'ACC', 'ACTC', 'ACT', 'ACT', 'CCT',
         'CCC', 'AAT', 'CT', 'AT', 'CT', 'CT', 'CT', 'CT', 'CT', 'CT',
         'ACT', 'ACT', 'CT', 'ACT', 'CT', 'CT', 'CT', 'CT']]
@@ -1562,11 +1520,10 @@
                                      n_jobs=2)
 
     total_improvement = history.total_improvement[-1]
-    assert_equal(round(total_improvement, 4), 85.0528)
+    assert round(total_improvement, 4) == 85.0528
 
 
-@with_setup(setup, teardown)
-def test_hmm_bw_fit_w_inertia2_parallel():
+def test_hmm_bw_fit_w_inertia2_parallel(model):
     seqs = [list(x) for x in ['ACT', 'ACT', 'ACC', 'ACTC', 'ACT', 'ACT', 'CCT',
         'CCC', 'AAT', 'CT', 'AT', 'CT', 'CT', 'CT', 'CT', 'CT', 'CT',
         'ACT', 'ACT', 'CT', 'ACT', 'CT', 'CT', 'CT', 'CT']]
@@ -1580,11 +1537,10 @@
                                      n_jobs=2)
 
     total_improvement = history.total_improvement[-1]
-    assert_equal(round(total_improvement, 4), 72.5134)
+    assert round(total_improvement, 4) == 72.5134
 
 
-@with_setup(setup, teardown)
-def test_hmm_bw_fit_w_pseudocount_inertia_parallel():
+def test_hmm_bw_fit_w_pseudocount_inertia_parallel(model):
     seqs = [list(x) for x in ['ACT', 'ACT', 'ACC', 'ACTC', 'ACT', 'ACT', 'CCT',
         'CCC', 'AAT', 'CT', 'AT', 'CT', 'CT', 'CT', 'CT', 'CT', 'CT',
         'ACT', 'ACT', 'CT', 'ACT', 'CT', 'CT', 'CT', 'CT']]
@@ -1599,11 +1555,10 @@
                                      n_jobs=2)
 
     total_improvement = history.total_improvement[-1]
-    assert_equal(round(total_improvement, 4), 83.0764)
+    assert round(total_improvement, 4) == 83.0764
 
 
-@with_setup(setup, teardown)
-def test_hmm_bw_fit_w_frozen_distributions_parallel():
+def test_hmm_bw_fit_w_frozen_distributions_parallel(model):
     seqs = [list(x) for x in ['ACT', 'ACT', 'ACC', 'ACTC', 'ACT', 'ACT', 'CCT',
         'CCC', 'AAT', 'CT', 'AT', 'CT', 'CT', 'CT', 'CT', 'CT', 'CT',
         'ACT', 'ACT', 'CT', 'ACT', 'CT', 'CT', 'CT', 'CT']]
@@ -1617,11 +1572,10 @@
                                      n_jobs=2)
 
     total_improvement = history.total_improvement[-1]
-    assert_equal(round(total_improvement, 4), 64.474)
+    assert round(total_improvement, 4) == 64.474
 
 
-@with_setup(setup, teardown)
-def test_hmm_bw_fit_w_frozen_edges_parallel():
+def test_hmm_bw_fit_w_frozen_edges_parallel(model):
     seqs = [list(x) for x in ['ACT', 'ACT', 'ACC', 'ACTC', 'ACT', 'ACT', 'CCT',
         'CCC', 'AAT', 'CT', 'AT', 'CT', 'CT', 'CT', 'CT', 'CT', 'CT',
         'ACT', 'ACT', 'CT', 'ACT', 'CT', 'CT', 'CT', 'CT']]
@@ -1635,11 +1589,10 @@
                                      n_jobs=2)
 
     total_improvement = history.total_improvement[-1]
-    assert_equal(round(total_improvement, 4), 44.0208)
+    assert round(total_improvement, 4) == 44.0208
 
 
-@with_setup(setup, teardown)
-def test_hmm_bw_fit_w_edge_a_distribution_inertia():
+def test_hmm_bw_fit_w_edge_a_distribution_inertia(model):
     seqs = [list(x) for x in ['ACT', 'ACT', 'ACC', 'ACTC', 'ACT', 'ACT', 'CCT',
         'CCC', 'AAT', 'CT', 'AT', 'CT', 'CT', 'CT', 'CT', 'CT', 'CT',
         'ACT', 'ACT', 'CT', 'ACT', 'CT', 'CT', 'CT', 'CT']]
@@ -1654,10 +1607,9 @@
                                      n_jobs=2)
 
     total_improvement = history.total_improvement[-1]
-    assert_equal(round(total_improvement, 4), 81.5447)
+    assert round(total_improvement, 4) == 81.5447
 
-@with_setup(setup, teardown)
-def test_hmm_bw_fit_one_check_input():
+def test_hmm_bw_fit_one_check_input(model):
     seqs = [list(x) for x in ['ACT', 'ACT', 'ACC', 'ACTC', 'ACT', 'ACT', 'CCT',
         'CCC', 'AAT', 'CT', 'AT', 'CT', 'CT', 'CT', 'CT', 'CT', 'CT',
         'ACT', 'ACT', 'CT', 'ACT', 'CT', 'CT', 'CT', 'CT']]
@@ -1671,11 +1623,11 @@
                                      multiple_check_input=False)
 
     total_improvement = history.total_improvement[-1]
-    assert_equal(round(total_improvement, 4), 83.1132)
+    assert round(total_improvement, 4) == 83.1132
 
 def test_hmm_initialization():
     hmmd1 = HiddenMarkovModel()
-    assert_equal(hmmd1.d, 0)
+    assert hmmd1.d == 0
 
 
 def test_hmm_univariate_initialization():
@@ -1691,10 +1643,10 @@
     hmmd1.add_transition(s3d1, s1d1, 0.5)
     hmmd1.add_transition(s3d1, s2d1, 0.5)
 
-    assert_equal(hmmd1.d, 0)
+    assert hmmd1.d == 0
 
     hmmd1.bake()
-    assert_equal(hmmd1.d, 1)
+    assert hmmd1.d == 1
 
 
 def test_hmm_multivariate_initialization():
@@ -1703,7 +1655,7 @@
     s3d3 = State(IndependentComponentsDistribution([UniformDistribution(0, 10), UniformDistribution(0, 10), UniformDistribution(0, 10)]))
 
     hmmd3 = HiddenMarkovModel()
-    assert_equal(hmmd3.d, 0)
+    assert hmmd3.d == 0
 
     hmmd3.add_transition(hmmd3.start, s1d3, 0.5)
     hmmd3.add_transition(hmmd3.start, s2d3, 0.5)
@@ -1711,10 +1663,10 @@
     hmmd3.add_transition(s2d3, s3d3, 1)
     hmmd3.add_transition(s3d3, s1d3, 0.5)
     hmmd3.add_transition(s3d3, s2d3, 0.5)
-    assert_equal(hmmd3.d, 0)
+    assert hmmd3.d == 0
 
     hmmd3.bake()
-    assert_equal(hmmd3.d, 3)
+    assert hmmd3.d == 3
 
 
 def test_hmm_initialization_error():
@@ -1729,11 +1681,12 @@
     hmmb.add_transition(sbd3, sbd1, 0.5)
     hmmb.add_transition(sbd3, sbd3, 0.5)
 
-    assert_raises(ValueError, hmmb.bake)
+    with pytest.raises(ValueError):
+        hmmb.bake()
 
 
-@with_setup(setup_univariate_gaussian_dense)
-def test_hmm_pickle_univariate():
+def test_hmm_pickle_univariate(univariate_gaussian_dense):
+    model = univariate_gaussian_dense
     model2 = pickle.loads(pickle.dumps(model))
 
     random_state = numpy.random.RandomState(0)
@@ -1746,8 +1699,8 @@
         assert_almost_equal(logp1, logp2)
 
 
-@with_setup(setup_univariate_gaussian_dense)
-def test_hmm_json_univariate():
+def test_hmm_json_univariate(univariate_gaussian_dense):
+    model = univariate_gaussian_dense
     model2 = HiddenMarkovModel.from_json(model.to_json())
 
     random_state = numpy.random.RandomState(0)
@@ -1759,8 +1712,8 @@
 
         assert_almost_equal(logp1, logp2)
 
-@with_setup(setup_univariate_gaussian_dense)
-def test_hmm_robust_from_json_univariate():
+def test_hmm_robust_from_json_univariate(univariate_gaussian_dense):
+    model = univariate_gaussian_dense
     model2 = from_json(model.to_json())
 
     random_state = numpy.random.RandomState(0)
@@ -1772,8 +1725,8 @@
 
         assert_almost_equal(logp1, logp2)
 
-@with_setup(setup_multivariate_gaussian_dense)
-def test_hmm_pickle_multivariate():
+def test_hmm_pickle_multivariate(multivariate_gaussian_dense):
+    model = multivariate_gaussian_dense
     model2 = pickle.loads(pickle.dumps(model))
 
     random_state = numpy.random.RandomState(0)
@@ -1786,8 +1739,8 @@
         assert_almost_equal(logp1, logp2)
 
 
-@with_setup(setup_multivariate_gaussian_dense)
-def test_hmm_json_multivariate():
+def test_hmm_json_multivariate(multivariate_gaussian_dense):
+    model = multivariate_gaussian_dense
     model2 = HiddenMarkovModel.from_json(model.to_json())
 
     random_state = numpy.random.RandomState(0)
@@ -1799,8 +1752,8 @@
 
         assert_almost_equal(logp1, logp2)
 
-@with_setup(setup_multivariate_gaussian_dense)
-def test_hmm_robust_from_json_multivariate():
+def test_hmm_robust_from_json_multivariate(multivariate_gaussian_dense):
+    model = multivariate_gaussian_dense
     model2 = from_json(model.to_json())
 
     random_state = numpy.random.RandomState(0)
@@ -1812,19 +1765,19 @@
 
         assert_almost_equal(logp1, logp2)
 
-@with_setup(setup_univariate_discrete_dense, teardown)
-def test_hmm_univariate_discrete_from_samples():
+def test_hmm_univariate_discrete_from_samples(univariate_discrete_dense):
+    model = univariate_discrete_dense
     X = [model.sample(random_state=0) for i in range(25)]
     model2 = HiddenMarkovModel.from_samples(DiscreteDistribution, 4, X, max_iterations=25)
 
     logp1 = sum(map(model.log_probability, X))
     logp2 = sum(map(model2.log_probability, X))
 
-    assert_greater(logp2, logp1)
+    assert logp2 > logp1
 
 
-@with_setup(setup_univariate_discrete_dense, teardown)
-def test_hmm_univariate_discrete_from_samples_one_check_input():
+def test_hmm_univariate_discrete_from_samples_one_check_input(univariate_discrete_dense):
+    model = univariate_discrete_dense
     X = [model.sample(random_state=0) for i in range(25)]
     model2 = HiddenMarkovModel.from_samples(DiscreteDistribution, 4, X, 
                                             max_iterations=25,
@@ -1833,53 +1786,53 @@
     logp1 = sum(map(model.log_probability, X))
     logp2 = sum(map(model2.log_probability, X))
 
-    assert_greater(logp2, logp1)
+    assert logp2 > logp1
 
-@with_setup(setup_univariate_gaussian_dense, teardown)
-def test_hmm_univariate_gaussian_from_samples():
+def test_hmm_univariate_gaussian_from_samples(univariate_gaussian_dense):
+    model = univariate_gaussian_dense
     X = model.sample(n=25, random_state=0)
     model2 = HiddenMarkovModel.from_samples(NormalDistribution, 4, X, max_iterations=25)
 
     logp1 = sum(map(model.log_probability, X))
     logp2 = sum(map(model2.log_probability, X))
 
-    assert_greater(logp2, logp1)
+    assert logp2 > logp1
 
 
-@with_setup(setup_multivariate_gaussian_dense, teardown)
-def test_hmm_multivariate_gaussian_from_samples():
+def test_hmm_multivariate_gaussian_from_samples(multivariate_gaussian_dense):
+    model = multivariate_gaussian_dense
     X = model.sample(n=25, random_state=0)
     model2 = HiddenMarkovModel.from_samples(MultivariateGaussianDistribution, 4, X, max_iterations=25)
 
     logp1 = sum(map(model.log_probability, X))
     logp2 = sum(map(model2.log_probability, X))
 
-    assert_greater(logp2, logp1)
+    assert logp2 > logp1
 
-@with_setup(setup_univariate_discrete_dense, teardown)
-def test_hmm_univariate_discrete_from_samples_end_state():
+def test_hmm_univariate_discrete_from_samples_end_state(univariate_discrete_dense):
+    model = univariate_discrete_dense
     X = model.sample(n=25, random_state=0)
     model2 = HiddenMarkovModel.from_samples(DiscreteDistribution, 4, X, max_iterations=25, end_state=True)
 
     #We get non-zero end probabilities for each state
-    assert_greater(model2.dense_transition_matrix()[0][model2.end_index],0)
-    assert_greater(model2.dense_transition_matrix()[1][model2.end_index],0)
-    assert_greater(model2.dense_transition_matrix()[2][model2.end_index],0)
-    assert_greater(model2.dense_transition_matrix()[3][model2.end_index],0)
+    assert model2.dense_transition_matrix()[0][model2.end_index] >0
+    assert model2.dense_transition_matrix()[1][model2.end_index] >0
+    assert model2.dense_transition_matrix()[2][model2.end_index] >0
+    assert model2.dense_transition_matrix()[3][model2.end_index] >0
 
-@with_setup(setup_univariate_discrete_dense, teardown)
-def test_hmm_univariate_discrete_from_samples_no_end_state():
+def test_hmm_univariate_discrete_from_samples_no_end_state(univariate_discrete_dense):
+    model = univariate_discrete_dense
     X = [model.sample(random_state=0) for i in range(25)]
     model2 = HiddenMarkovModel.from_samples(DiscreteDistribution, 4, X, max_iterations=25, end_state=False)
 
     #We don't have end probabilities for each state
-    assert_equal(model2.dense_transition_matrix()[0][model2.end_index],0)
-    assert_equal(model2.dense_transition_matrix()[1][model2.end_index],0)
-    assert_equal(model2.dense_transition_matrix()[2][model2.end_index],0)
-    assert_equal(model2.dense_transition_matrix()[3][model2.end_index],0)
+    assert model2.dense_transition_matrix()[0][model2.end_index] ==0
+    assert model2.dense_transition_matrix()[1][model2.end_index] ==0
+    assert model2.dense_transition_matrix()[2][model2.end_index] ==0
+    assert model2.dense_transition_matrix()[3][model2.end_index] ==0
 
-@with_setup(setup_general_mixture_gaussian, teardown)
-def test_hmm_json_general_mixture_gaussian():
+def test_hmm_json_general_mixture_gaussian(general_mixture_gaussian):
+    model = general_mixture_gaussian
     model2 = HiddenMarkovModel.from_json(model.to_json())
     random_state = numpy.random.RandomState(0)
     for i in range(10):
@@ -1890,8 +1843,8 @@
 
         assert_almost_equal(logp1, logp2)
 
-@with_setup(setup_general_mixture_gaussian, teardown)
-def test_hmm_robust_from_json_general_mixture_gaussian():
+def test_hmm_robust_from_json_general_mixture_gaussian(general_mixture_gaussian):
+    model = general_mixture_gaussian
     model2 = from_json(model.to_json())
     random_state = numpy.random.RandomState(0)
     for i in range(10):
--- python-pomegranate.orig/tests/test_io.py
+++ python-pomegranate/tests/test_io.py
@@ -3,17 +3,13 @@
 from pomegranate.io import SequenceGenerator
 from pomegranate.io import DataFrameGenerator
 
-from .tools import with_setup
-from .tools import assert_true
-from .tools import assert_equal
-from .tools import assert_raises
 from numpy.testing import assert_almost_equal
 from numpy.testing import assert_array_equal
-from numpy.testing import assert_array_almost_equal
 
 import random
 import numpy
 import pandas
+import pytest
 
 numpy.random.seed(0)
 random.seed(0)
@@ -30,7 +26,9 @@
 	X = numpy.random.randn(500, 13)
 	data = DataGenerator(X)
 
-	assert_raises(ValueError, lambda data: data.classes, data)
+	with pytest.raises(ValueError):
+		d = lambda data: data.classes
+		d(data)
 
 def test_io_datagenerator_classes():
 	X = numpy.random.randn(500, 13)
@@ -146,7 +144,7 @@
 	X_ = numpy.concatenate([batch[0] for batch in data.unlabeled_batches()])
 	w_ = numpy.concatenate([batch[1] for batch in data.unlabeled_batches()])
 
-	assert_true(X.shape[0] > X_.shape[0])
+	assert X.shape[0] > X_.shape[0]
 	assert_almost_equal(X[y == -1], X_)
 	assert_almost_equal(w[y == -1], w_)
 
@@ -160,7 +158,7 @@
 	y_ = numpy.concatenate([batch[1] for batch in data.labeled_batches()])
 	w_ = numpy.concatenate([batch[2] for batch in data.labeled_batches()])
 
-	assert_true(X.shape[0] > X_.shape[0])
+	assert X.shape[0] > X_.shape[0]
 	assert_almost_equal(X[y != -1], X_)
 	assert_almost_equal(y[y != -1], y_)
 	assert_almost_equal(w[y != -1], w_)
@@ -278,8 +276,8 @@
 	X_ = numpy.concatenate([batch[0] for batch in data.unlabeled_batches()])
 	w_ = numpy.concatenate([batch[1] for batch in data.unlabeled_batches()])
 
-	assert_true(len(X) > len(X_))
-	assert_true(len(w) > len(w_))
+	assert len(X) > len(X_)
+	assert len(w) > len(w_)
 
 	i = 0
 	for j in range(500):
@@ -301,8 +299,8 @@
 	X_ = numpy.concatenate([batch[0] for batch in data.unlabeled_batches()])
 	w_ = numpy.concatenate([batch[1] for batch in data.unlabeled_batches()])
 
-	assert_true(len(X) > len(X_))
-	assert_true(len(w) > len(w_))
+	assert len(X) > len(X_)
+	assert len(w) > len(w_)
 
 	i = 0
 	for j in range(500):
@@ -325,9 +323,9 @@
 	w_ = numpy.concatenate([batch[1] for batch in data.labeled_batches()])
 	y_ = numpy.concatenate([batch[2] for batch in data.labeled_batches()])
 
-	assert_true(len(X) > len(X_))
-	assert_true(len(w) > len(w_))
-	assert_true(len(y) > len(y_))
+	assert len(X) > len(X_)
+	assert len(w) > len(w_)
+	assert len(y) > len(y_)
 
 	i = 0
 	for j in range(500):
@@ -351,9 +349,9 @@
 	w_ = numpy.concatenate([batch[1] for batch in data.labeled_batches()])
 	y_ = numpy.concatenate([batch[2] for batch in data.labeled_batches()])
 
-	assert_true(len(X) > len(X_))
-	assert_true(len(w) > len(w_))
-	assert_true(len(y) > len(y_))
+	assert len(X) > len(X_)
+	assert len(w) > len(w_)
+	assert len(y) > len(y_)
 
 	i = 0
 	for j in range(500):
@@ -373,7 +371,9 @@
 	X = pandas.DataFrame(numpy.random.randn(500, 13))
 	data = DataFrameGenerator(X)
 
-	assert_raises(ValueError, lambda data: data.classes, data)
+	with pytest.raises(ValueError):
+		d = lambda data: data.classes
+		d(data)
 
 def test_io_dfgenerator_numpy_classes():
 	X = pandas.DataFrame(numpy.random.randn(500, 13))
@@ -627,7 +627,7 @@
 	X_ = numpy.concatenate([batch[0] for batch in data.unlabeled_batches()])
 	w_ = numpy.concatenate([batch[1] for batch in data.unlabeled_batches()])
 
-	assert_true(X.shape[0] > X_.shape[0])
+	assert X.shape[0] > X_.shape[0]
 	assert_almost_equal(X.loc[y == -1], X_)
 	assert_almost_equal(w[y == -1], w_)
 
@@ -644,7 +644,7 @@
 	X_ = numpy.concatenate([batch[0] for batch in data.unlabeled_batches()])
 	w_ = numpy.concatenate([batch[1] for batch in data.unlabeled_batches()])
 
-	assert_true(X.shape[0] > X_.shape[0])
+	assert X.shape[0] > X_.shape[0]
 	assert_almost_equal(X2.loc[y == -1], X_)
 	assert_almost_equal(w[y == -1], w_)
 
@@ -658,7 +658,7 @@
 	y_ = numpy.concatenate([batch[1] for batch in data.labeled_batches()])
 	w_ = numpy.concatenate([batch[2] for batch in data.labeled_batches()])
 
-	assert_true(X.shape[0] > X_.shape[0])
+	assert X.shape[0] > X_.shape[0]
 	assert_almost_equal(X.loc[y != -1], X_)
 	assert_almost_equal(y[y != -1], y_)
 	assert_almost_equal(w[y != -1], w_)
@@ -677,7 +677,7 @@
 	y_ = numpy.concatenate([batch[1] for batch in data.labeled_batches()])
 	w_ = numpy.concatenate([batch[2] for batch in data.labeled_batches()])
 
-	assert_true(X.shape[0] > X_.shape[0])
+	assert X.shape[0] > X_.shape[0]
 	assert_almost_equal(X2.loc[y != -1], X_)
 	assert_almost_equal(y[y != -1], y_)
 	assert_almost_equal(w[y != -1], w_)
--- python-pomegranate.orig/tests/test_kmeans.py
+++ python-pomegranate/tests/test_kmeans.py
@@ -1,24 +1,17 @@
 from pomegranate import *
-from .tools import with_setup
-from .tools import assert_true
-from .tools import assert_equal
-from .tools import assert_greater_equal
-from .tools import assert_greater
-from .tools import assert_raises
-from .tools import assert_not_equal
-from numpy.testing import assert_almost_equal
 from numpy.testing import assert_array_almost_equal
 from numpy.testing import assert_array_equal
-import random
-import pickle
-import numpy as np
+import numpy
+
+import pytest
 
 numpy.random.seed(0)
 
-def setup_three_dimensions():
-	global X
-	X = numpy.array([[-0.13174492,  0.51895916, -1.13141796],
-		 [ 7.92260379,  7.86325294,  7.9884075 ],
+
+@pytest.fixture
+def three_dimensions():
+    X = numpy.array([[-0.13174492,  0.51895916, -1.13141796],
+    	 [ 7.92260379,  7.86325294,  7.9884075 ],
          [-0.63378039, -0.96394236, -1.34125012],
          [ 8.16216236,  8.04655182,  6.68825619],
          [-0.69595565, -0.19004012,  0.40768949],
@@ -28,37 +21,34 @@
          [-0.97493454, -0.04714556, -0.38607725],
          [ 9.65781658,  7.04832845,  6.47613347]])
 
-	idxs = numpy.array([29, 19, 26, 11,  8, 27, 21,  7, 14, 13])
-	i, j = idxs // 3, idxs % 3
-
-	global X_nan
-	X_nan = X.copy()
-	X_nan[i, j] = numpy.nan
+    idxs = numpy.array([29, 19, 26, 11,  8, 27, 21,  7, 14, 13])
+    i, j = idxs // 3, idxs % 3
 
+    X_nan = X.copy()
+    X_nan[i, j] = numpy.nan
 
-	global centroids
-	centroids = numpy.array([[0, 0, 0],
-							 [8, 8, 8]])
+    centroids = numpy.array([[0, 0, 0],
+    						 [8, 8, 8]])
 
-	global model
-	model = Kmeans(2, centroids)
+    model = Kmeans(2, centroids)
+    return X, X_nan, centroids, model
 
 
-def setup_five_dimensions():
-	global X
-	X = numpy.array([[-0.04320239,  2.25402395, -0.3075753 ,  0.01710706,  2.88816037],
-	      [ 3.6483074 ,  5.03958367,  3.14457941,  4.94180558,  4.32880698],
-	      [ 7.48485345,  8.54100011,  7.90936486,  8.12260819,  6.6466098 ],
-	      [ 12.15394848,  10.52091121,  13.55495735,  10.48190106, 10.94417476],
+@pytest.fixture
+def five_dimensions():
+    X = numpy.array([[-0.04320239,  2.25402395, -0.3075753 ,  0.01710706,  2.88816037],
+          [ 3.6483074 ,  5.03958367,  3.14457941,  4.94180558,  4.32880698],
+          [ 7.48485345,  8.54100011,  7.90936486,  8.12260819,  6.6466098 ],
+          [ 12.15394848,  10.52091121,  13.55495735,  10.48190106, 10.94417476],
           [ 1.21068778,  0.77311369, -0.31479566, -0.51865649,  0.4408653 ],
           [-0.62796182, -0.34947675, -1.09050772, -0.34591408,  0.78866514],
           [ 0.5661847 ,  0.30785453,  0.38823634,  1.99717206, -0.99415221],
           [ 0.10871016,  2.06244903, -0.19580087, -0.22100353, -0.43777027],
-	      [ 3.06987578,  4.8633418 ,  4.23645519,  4.20563589,  3.40046883],
+          [ 3.06987578,  4.8633418 ,  4.23645519,  4.20563589,  3.40046883],
           [ 3.0471144 ,  3.43070459,  3.88690894,  3.61962816,  3.52399965],
           [ 3.3020318 ,  5.16491752,  3.85249134,  2.7075964 ,  4.03831846],
           [ 3.55266908,  2.69803949,  4.13340743,  5.72527752,  4.9840009 ],
-	      [ 7.27689336,  8.99614296,  7.10109146,  7.81354687,  7.27320546],
+          [ 7.27689336,  8.99614296,  7.10109146,  7.81354687,  7.27320546],
           [ 9.55443921,  7.70358635,  8.9762396 ,  7.8054752 ,  7.95933534],
           [ 7.55150108,  9.09523173,  8.38379803,  8.18932292,  7.70853   ],
           [ 9.59329137,  8.26811547,  9.82226673,  8.35257773,  8.21768809],
@@ -67,358 +57,360 @@
           [ 11.41978669,  11.45646564,  11.77622614,  11.96590564, 12.33083825],
           [ 12.13323296,  11.89683824,  12.18373541,  13.21432431, 11.79987739]])
 
-	idxs = numpy.array([77, 26, 61, 46, 18, 30, 94, 96, 45, 67,  4, 20, 23, 73, 37, 21, 58,
+    idxs = numpy.array([77, 26, 61, 46, 18, 30, 94, 96, 45, 67,  4, 20, 23, 73, 37, 21, 58,
        99, 51,  7, 69, 53, 81, 85, 95,  9, 98, 24, 28, 38])
-	i, j = idxs // 5, idxs % 5
+    i, j = idxs // 5, idxs % 5
 
-	global X_nan
-	X_nan = X.copy()
-	X_nan[i, j] = numpy.nan
-
-	global centroids
-	centroids = numpy.array([[0, 0, 0, 0, 0],
-							 [4, 4, 4, 4, 4],
-							 [8, 8, 8, 8, 8],
-							 [12, 12, 12, 12, 12]])
+    X_nan = X.copy()
+    X_nan[i, j] = numpy.nan
 
-	global model
-	model = Kmeans(4, centroids)
+    centroids = numpy.array([[0, 0, 0, 0, 0],
+    						 [4, 4, 4, 4, 4],
+    						 [8, 8, 8, 8, 8],
+    						 [12, 12, 12, 12, 12]])
+
+    model = Kmeans(4, centroids)
+    return X, X_nan, centroids, model
 
 
 def test_kmeans_init():
-	centroids = [[2, 3], [5, 7]]
-	model = Kmeans(2, centroids)
-	assert_equal(model.d, 2)
-	assert_equal(model.k, 2)
-	assert_array_equal(model.centroids, centroids)
+    centroids = [[2, 3], [5, 7]]
+    model = Kmeans(2, centroids)
+    assert model.d == 2
+    assert model.k == 2
+    assert_array_equal(model.centroids, centroids)
 
 
-@with_setup(setup_three_dimensions)
-def test_kmeans_from_samples():
-	model = Kmeans.from_samples(2, X, init='first-k')
-	centroids = [[-0.872246, -0.344245, -0.578309],
+def test_kmeans_from_samples(three_dimensions):
+    X, X_nan, centroids, model = three_dimensions
+    model = Kmeans.from_samples(2, X, init='first-k')
+    centroids = [[-0.872246, -0.344245, -0.578309],
       			 [ 8.282911,  7.825455,  7.069913]]
 
-	assert_array_almost_equal(model.centroids, centroids)
+    assert_array_almost_equal(model.centroids, centroids)
 
 
-@with_setup(setup_three_dimensions)
-def test_kmeans_from_samples_parallel():
-	model = Kmeans.from_samples(2, X, init='first-k', n_jobs=2)
-	centroids = [[-0.872246, -0.344245, -0.578309],
+def test_kmeans_from_samples_parallel(three_dimensions):
+    X, X_nan, centroids, model = three_dimensions
+    model = Kmeans.from_samples(2, X, init='first-k', n_jobs=2)
+    centroids = [[-0.872246, -0.344245, -0.578309],
       			 [ 8.282911,  7.825455,  7.069913]]
 
-	assert_array_almost_equal(model.centroids, centroids)
+    assert_array_almost_equal(model.centroids, centroids)
+
 
+def test_kmeans_predict(three_dimensions):
+    X, X_nan, centroids, model = three_dimensions
+    y = numpy.array([0, 1, 0, 1, 0, 1, 0, 1, 0, 1])
+    y_hat = model.predict(X)
+    assert_array_equal(y, y_hat)
 
-@with_setup(setup_three_dimensions)
-def test_kmeans_predict():
-	y = numpy.array([0, 1, 0, 1, 0, 1, 0, 1, 0, 1])
-	y_hat = model.predict(X)
-	assert_array_equal(y, y_hat)
 
+def test_kmeans_predict_parallel(three_dimensions):
+    X, X_nan, centroids, model = three_dimensions
+    y = numpy.array([0, 1, 0, 1, 0, 1, 0, 1, 0, 1])
+    y_hat = model.predict(X, n_jobs=2)
+    assert_array_equal(y, y_hat)
 
-@with_setup(setup_three_dimensions)
-def test_kmeans_predict_parallel():
-	y = numpy.array([0, 1, 0, 1, 0, 1, 0, 1, 0, 1])
-	y_hat = model.predict(X, n_jobs=2)
-	assert_array_equal(y, y_hat)
+    y_hat = model.predict(X, n_jobs=4)
+    assert_array_equal(y, y_hat)
 
-	y_hat = model.predict(X, n_jobs=4)
-	assert_array_equal(y, y_hat)
 
+def test_kmeans_predict_large(five_dimensions):
+    X, X_nan, centroids, model = five_dimensions
+    y = [0, 1, 2, 3, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3]
+    y_hat = model.predict(X)
+    assert_array_equal(y, y_hat)
 
-@with_setup(setup_five_dimensions)
-def test_kmeans_predict_large():
-	y = [0, 1, 2, 3, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3]
-	y_hat = model.predict(X)
-	assert_array_equal(y, y_hat)
 
+def test_kmeans_predict_large_parallel(five_dimensions):
+    X, X_nan, centroids, model = five_dimensions
+    y = [0, 1, 2, 3, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3]
+    y_hat = model.predict(X, n_jobs=2)
+    assert_array_equal(y, y_hat)
 
-@with_setup(setup_five_dimensions)
-def test_kmeans_predict_large_parallel():
-	y = [0, 1, 2, 3, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3]
-	y_hat = model.predict(X, n_jobs=2)
-	assert_array_equal(y, y_hat)
+    y_hat = model.predict(X, n_jobs=4)
+    assert_array_equal(y, y_hat)
 
-	y_hat = model.predict(X, n_jobs=4)
-	assert_array_equal(y, y_hat)
 
+def test_kmeans_fit(three_dimensions):
+    X, X_nan, centroids, model = three_dimensions
+    model.fit(X)
 
-@with_setup(setup_three_dimensions)
-def test_kmeans_fit():
-	model.fit(X)
+    centroids = [[-0.872246, -0.344245, -0.578309],
+    			 [ 8.282911,  7.825455,  7.069913]]
 
-	centroids = [[-0.872246, -0.344245, -0.578309],
-       			 [ 8.282911,  7.825455,  7.069913]]
+    assert_array_almost_equal(model.centroids, centroids)
 
-	assert_array_almost_equal(model.centroids, centroids)
 
+def test_kmeans_fit_parallel(three_dimensions):
+    X, X_nan, centroids, model = three_dimensions
+    model.fit(X, n_jobs=2)
 
-@with_setup(setup_three_dimensions)
-def test_kmeans_fit_parallel():
-	model.fit(X, n_jobs=2)
+    centroids = [[-0.872246, -0.344245, -0.578309],
+    			 [ 8.282911,  7.825455,  7.069913]]
 
-	centroids = [[-0.872246, -0.344245, -0.578309],
-       			 [ 8.282911,  7.825455,  7.069913]]
+    assert_array_almost_equal(model.centroids, centroids)
 
-	assert_array_almost_equal(model.centroids, centroids)
+    model.fit(X, n_jobs=4)
 
-	model.fit(X, n_jobs=4)
+    centroids = [[-0.872246, -0.344245, -0.578309],
+    			 [ 8.282911,  7.825455,  7.069913]]
 
-	centroids = [[-0.872246, -0.344245, -0.578309],
-       			 [ 8.282911,  7.825455,  7.069913]]
+    assert_array_almost_equal(model.centroids, centroids)
 
-	assert_array_almost_equal(model.centroids, centroids)
 
+def test_kmeans_multiple_init(five_dimensions):
+    X, X_nan, centroids, model = five_dimensions
+    model1 = Kmeans.from_samples(4, X, init='kmeans++', n_init=1)
+    model2 = Kmeans.from_samples(4, X, init='kmeans++', n_init=25)
 
-@with_setup(setup_five_dimensions)
-def test_kmeans_multiple_init():
-	model1 = Kmeans.from_samples(4, X, init='kmeans++', n_init=1)
-	model2 = Kmeans.from_samples(4, X, init='kmeans++', n_init=25)
+    dist1 = model1.distance(X).min(axis=1).sum()
+    dist2 = model2.distance(X).min(axis=1).sum()
 
-	dist1 = model1.distance(X).min(axis=1).sum()
-	dist2 = model2.distance(X).min(axis=1).sum()
+    assert dist1 >= dist2
 
-	assert_greater_equal(dist1, dist2)
+    model1 = Kmeans.from_samples(4, X, init='first-k', n_init=1)
+    model2 = Kmeans.from_samples(4, X, init='first-k', n_init=5)
 
-	model1 = Kmeans.from_samples(4, X, init='first-k', n_init=1)
-	model2 = Kmeans.from_samples(4, X, init='first-k', n_init=5)
+    dist1 = model1.distance(X).min(axis=1).sum()
+    dist2 = model2.distance(X).min(axis=1).sum()
 
-	dist1 = model1.distance(X).min(axis=1).sum()
-	dist2 = model2.distance(X).min(axis=1).sum()
+    assert dist1 == dist2
 
-	assert_equal(dist1, dist2)
 
+def test_kmeans_ooc_from_samples(five_dimensions):
+    X, X_nan, centroids, model = five_dimensions
+    numpy.random.seed(0)
 
-@with_setup(setup_five_dimensions)
-def test_kmeans_ooc_from_samples():
-	numpy.random.seed(0)
+    model1 = Kmeans.from_samples(5, X, init='first-k', batch_size=20)
+    model2 = Kmeans.from_samples(5, X, init='first-k', batch_size=None)
 
-	model1 = Kmeans.from_samples(5, X, init='first-k', batch_size=20)
-	model2 = Kmeans.from_samples(5, X, init='first-k', batch_size=None)
+    assert_array_equal(model1.centroids, model2.centroids)
 
-	assert_array_equal(model1.centroids, model2.centroids)
 
+def test_kmeans_ooc_fit(three_dimensions):
+    X, X_nan, centroids, model = three_dimensions
+    centroids_copy = numpy.copy(centroids)
+    model1 = Kmeans(2, centroids_copy, n_init=1)
+    model1.fit(X)
 
-@with_setup(setup_three_dimensions)
-def test_kmeans_ooc_fit():
-	centroids_copy = numpy.copy(centroids)
-	model1 = Kmeans(2, centroids_copy, n_init=1)
-	model1.fit(X)
+    centroids_copy = numpy.copy(centroids)
+    model2 = Kmeans(2, centroids_copy, n_init=1)
+    model2.fit(X, batch_size=10)
 
-	centroids_copy = numpy.copy(centroids)
-	model2 = Kmeans(2, centroids_copy, n_init=1)
-	model2.fit(X, batch_size=10)
+    centroids_copy = numpy.copy(centroids)
+    model3 = Kmeans(2, centroids_copy, n_init=1)
+    model3.fit(X, batch_size=1)
 
-	centroids_copy = numpy.copy(centroids)
-	model3 = Kmeans(2, centroids_copy, n_init=1)
-	model3.fit(X, batch_size=1)
+    assert_array_almost_equal(model1.centroids, model2.centroids)
+    assert_array_almost_equal(model1.centroids, model3.centroids)
 
-	assert_array_almost_equal(model1.centroids, model2.centroids)
-	assert_array_almost_equal(model1.centroids, model3.centroids)
 
+def test_kmeans_minibatch_from_samples(five_dimensions):
+    X, X_nan, centroids, model = five_dimensions
+    model1 = Kmeans.from_samples(4, X, init='first-k', batch_size=10)
+    model2 = Kmeans.from_samples(4, X, init='first-k', batch_size=None)
+    model3 = Kmeans.from_samples(4, X, init='first-k', batch_size=10, batches_per_epoch=1)
 
-@with_setup(setup_five_dimensions)
-def test_kmeans_minibatch_from_samples():
-	model1 = Kmeans.from_samples(4, X, init='first-k', batch_size=10)
-	model2 = Kmeans.from_samples(4, X, init='first-k', batch_size=None)
-	model3 = Kmeans.from_samples(4, X, init='first-k', batch_size=10, batches_per_epoch=1)
+    assert_array_almost_equal(model1.centroids, model2.centroids)
+    with pytest.raises(AssertionError):
+        assert_array_equal(model1.centroids, model3.centroids)
 
-	assert_array_almost_equal(model1.centroids, model2.centroids)
-	assert_raises(AssertionError, assert_array_equal, model1.centroids, model3.centroids)
 
+def test_kmeans_minibatch_fit(five_dimensions):
+    X, X_nan, centroids, model = five_dimensions
+    centroids_copy = numpy.copy(centroids)
+    model1 = Kmeans(4, centroids_copy)
+    model1.fit(X, batch_size=10)
 
-@with_setup(setup_five_dimensions)
-def test_kmeans_minibatch_fit():
-	centroids_copy = numpy.copy(centroids)
-	model1 = Kmeans(4, centroids_copy)
-	model1.fit(X, batch_size=10)
+    centroids_copy = numpy.copy(centroids)
+    model2 = Kmeans(4, centroids_copy)
+    model2.fit(X, batch_size=None)
 
-	centroids_copy = numpy.copy(centroids)
-	model2 = Kmeans(4, centroids_copy)
-	model2.fit(X, batch_size=None)
+    centroids_copy = numpy.copy(centroids)
+    model3 = Kmeans(4, centroids_copy)
+    model3.fit(X, batch_size=5, batches_per_epoch=1)
 
-	centroids_copy = numpy.copy(centroids)
-	model3 = Kmeans(4, centroids_copy)
-	model3.fit(X, batch_size=5, batches_per_epoch=1)
+    assert_array_almost_equal(model1.centroids, model2.centroids)
+    with pytest.raises(AssertionError):
+        assert_array_equal(model1.centroids, model3.centroids)
 
-	assert_array_almost_equal(model1.centroids, model2.centroids)
-	assert_raises(AssertionError, assert_array_equal, model1.centroids, model3.centroids)
 
+def test_kmeans_nan_from_samples(three_dimensions):
+    X, X_nan, centroids, model = three_dimensions
+    model = Kmeans.from_samples(2, X_nan, init='first-k')
+    centroids = [[-0.872246,  0.235907, -0.785954],
+                 [ 7.94916 ,  7.825455,  7.395059]]
 
-@with_setup(setup_three_dimensions)
-def test_kmeans_nan_from_samples():
-	model = Kmeans.from_samples(2, X_nan, init='first-k')
-	centroids = [[-0.872246,  0.235907, -0.785954],
-      			 [ 7.94916 ,  7.825455,  7.395059]]
+    assert_array_almost_equal(model.centroids, centroids)
 
-	assert_array_almost_equal(model.centroids, centroids)
 
+def test_kmeans_nan_from_samples_parallel(three_dimensions):
+    X, X_nan, centroids, model = three_dimensions
+    model = Kmeans.from_samples(2, X_nan, init='first-k', n_jobs=2)
+    centroids = [[-0.872246,  0.235907, -0.785954],
+                 [ 7.94916 ,  7.825455,  7.395059]]
 
-@with_setup(setup_three_dimensions)
-def test_kmeans_nan_from_samples_parallel():
-	model = Kmeans.from_samples(2, X_nan, init='first-k', n_jobs=2)
-	centroids = [[-0.872246,  0.235907, -0.785954],
-      			 [ 7.94916 ,  7.825455,  7.395059]]
+    assert_array_almost_equal(model.centroids, centroids)
 
-	assert_array_almost_equal(model.centroids, centroids)
 
+def test_kmeans_nan_fit(three_dimensions):
+    X, X_nan, centroids, model = three_dimensions
+    model.fit(X_nan)
 
-@with_setup(setup_three_dimensions)
-def test_kmeans_nan_fit():
-	model.fit(X_nan)
+    centroids = [[-0.872246,  0.235907, -0.785954],
+                 [ 7.94916 ,  7.825455,  7.395059]]
 
-	centroids = [[-0.872246,  0.235907, -0.785954],
-      			 [ 7.94916 ,  7.825455,  7.395059]]
+    assert_array_almost_equal(model.centroids, centroids)
 
-	assert_array_almost_equal(model.centroids, centroids)
 
+def test_kmeans_nan_fit_parallel(three_dimensions):
+    X, X_nan, centroids, model = three_dimensions
+    model.fit(X_nan, n_jobs=2)
 
-@with_setup(setup_three_dimensions)
-def test_kmeans_nan_fit_parallel():
-	model.fit(X_nan, n_jobs=2)
+    centroids = [[-0.872246,  0.235907, -0.785954],
+                 [ 7.94916 ,  7.825455,  7.395059]]
 
-	centroids = [[-0.872246,  0.235907, -0.785954],
-      			 [ 7.94916 ,  7.825455,  7.395059]]
+    assert_array_almost_equal(model.centroids, centroids)
 
-	assert_array_almost_equal(model.centroids, centroids)
 
+def test_kmeans_nan_fit_large(five_dimensions):
+    X, X_nan, centroids, model = five_dimensions
+    model.fit(X_nan)
 
-@with_setup(setup_five_dimensions)
-def test_kmeans_nan_fit_large():
-	model.fit(X_nan)
+    centroids = [[ -0.187485,	1.541443,  -0.331161,	1.00714 ,  -0.214419],
+                 [	3.393221,	4.200322,	4.027316,	4.25569 ,	3.986697],
+                 [	8.292196,	8.401983,	7.798085,	8.023552,	7.461508],
+                 [ 11.782228,  11.711423,  12.625309,  11.727549,  10.917095]]
 
-	centroids = [[ -0.187485,   1.541443,  -0.331161,   1.00714 ,  -0.214419],
-		         [  3.393221,   4.200322,   4.027316,   4.25569 ,   3.986697],
-		         [  8.292196,   8.401983,   7.798085,   8.023552,   7.461508],
-		         [ 11.782228,  11.711423,  12.625309,  11.727549,  10.917095]]
+    assert_array_almost_equal(model.centroids, centroids)
 
-	assert_array_almost_equal(model.centroids, centroids)
 
+def test_kmeans_nan_fit_large_parallel(five_dimensions):
+    X, X_nan, centroids, model = five_dimensions
+    model.fit(X_nan, n_jobs=2)
 
-@with_setup(setup_five_dimensions)
-def test_kmeans_nan_fit_large_parallel():
-	model.fit(X_nan, n_jobs=2)
+    centroids = [[ -0.187485,	1.541443,  -0.331161,	1.00714 ,  -0.214419],
+                 [	3.393221,	4.200322,	4.027316,	4.25569 ,	3.986697],
+                 [	8.292196,	8.401983,	7.798085,	8.023552,	7.461508],
+                 [ 11.782228,  11.711423,  12.625309,  11.727549,  10.917095]]
 
-	centroids = [[ -0.187485,   1.541443,  -0.331161,   1.00714 ,  -0.214419],
-		         [  3.393221,   4.200322,   4.027316,   4.25569 ,   3.986697],
-		         [  8.292196,   8.401983,   7.798085,   8.023552,   7.461508],
-		         [ 11.782228,  11.711423,  12.625309,  11.727549,  10.917095]]
+    assert_array_almost_equal(model.centroids, centroids)
 
-	assert_array_almost_equal(model.centroids, centroids)
 
+def test_kmeans_nan_predict(three_dimensions):
+    X, X_nan, centroids, model = three_dimensions
+    y = numpy.array([0, 1, 0, 1, 0, 1, 0, 1, 0, 1])
+    y_hat = model.predict(X_nan)
 
-@with_setup(setup_three_dimensions)
-def test_kmeans_nan_predict():
-	y = numpy.array([0, 1, 0, 1, 0, 1, 0, 1, 0, 1])
-	y_hat = model.predict(X_nan)
+    assert_array_almost_equal(y, y_hat)
 
-	assert_array_almost_equal(y, y_hat)
 
+def test_kmeans_nan_predict_parallel(three_dimensions):
+    X, X_nan, centroids, model = three_dimensions
+    y = numpy.array([0, 1, 0, 1, 0, 1, 0, 1, 0, 1])
+    y_hat = model.predict(X_nan, n_jobs=2)
 
-@with_setup(setup_three_dimensions)
-def test_kmeans_nan_predict_parallel():
-	y = numpy.array([0, 1, 0, 1, 0, 1, 0, 1, 0, 1])
-	y_hat = model.predict(X_nan, n_jobs=2)
+    assert_array_almost_equal(y, y_hat)
 
-	assert_array_almost_equal(y, y_hat)
 
+def test_kmeans_nan_large_predict(five_dimensions):
+    X, X_nan, centroids, model = five_dimensions
+    y = numpy.array([0, 1, 2, 3, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3])
+    y_hat = model.predict(X_nan)
 
-@with_setup(setup_five_dimensions)
-def test_kmeans_nan_large_predict():
-	y = numpy.array([0, 1, 2, 3, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3])
-	y_hat = model.predict(X_nan)
+    assert_array_almost_equal(y, y_hat)
 
-	assert_array_almost_equal(y, y_hat)
 
+def test_kmeans_nan_large_predict_parallel(five_dimensions):
+    X, X_nan, centroids, model = five_dimensions
+    y = numpy.array([0, 1, 2, 3, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3])
+    y_hat = model.predict(X_nan, n_jobs=2)
 
-@with_setup(setup_five_dimensions)
-def test_kmeans_nan_large_predict_parallel():
-	y = numpy.array([0, 1, 2, 3, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3])
-	y_hat = model.predict(X_nan, n_jobs=2)
+    assert_array_almost_equal(y, y_hat)
 
-	assert_array_almost_equal(y, y_hat)
 
+def test_kmeans_nan_multiple_init(five_dimensions):
+    X, X_nan, centroids, model = five_dimensions
+    numpy.random.seed(0)
+    model1 = Kmeans.from_samples(4, X_nan, init='kmeans++', n_init=1)
 
-@with_setup(setup_five_dimensions)
-def test_kmeans_nan_multiple_init():
-	numpy.random.seed(0)
-	model1 = Kmeans.from_samples(4, X_nan, init='kmeans++', n_init=1)
-	
-	numpy.random.seed(0)
-	model2 = Kmeans.from_samples(4, X_nan, init='kmeans++', n_init=25)
+    numpy.random.seed(0)
+    model2 = Kmeans.from_samples(4, X_nan, init='kmeans++', n_init=25)
 
-	dist1 = model1.distance(X).min(axis=1).sum()
-	dist2 = model2.distance(X).min(axis=1).sum()
+    dist1 = model1.distance(X).min(axis=1).sum()
+    dist2 = model2.distance(X).min(axis=1).sum()
 
-	assert_greater_equal(dist1, dist2)
+    assert dist1 >= dist2
 
-	model1 = Kmeans.from_samples(4, X_nan, init='first-k', n_init=1)
-	model2 = Kmeans.from_samples(4, X_nan, init='first-k', n_init=5)
+    model1 = Kmeans.from_samples(4, X_nan, init='first-k', n_init=1)
+    model2 = Kmeans.from_samples(4, X_nan, init='first-k', n_init=5)
 
-	dist1 = model1.distance(X).min(axis=1).sum()
-	dist2 = model2.distance(X).min(axis=1).sum()
+    dist1 = model1.distance(X).min(axis=1).sum()
+    dist2 = model2.distance(X).min(axis=1).sum()
 
-	assert_equal(dist1, dist2)
+    assert dist1 == dist2
 
 
-@with_setup(setup_five_dimensions)
-def test_kmeans_ooc_nan_from_samples():
-	model1 = Kmeans.from_samples(4, X_nan, init='first-k', batch_size=20)
-	model2 = Kmeans.from_samples(4, X_nan, init='first-k', batch_size=None)
+def test_kmeans_ooc_nan_from_samples(five_dimensions):
+    X, X_nan, centroids, model = five_dimensions
+    model1 = Kmeans.from_samples(4, X_nan, init='first-k', batch_size=20)
+    model2 = Kmeans.from_samples(4, X_nan, init='first-k', batch_size=None)
 
-	assert_array_almost_equal(model1.centroids, model2.centroids)
+    assert_array_almost_equal(model1.centroids, model2.centroids)
 
 
-@with_setup(setup_five_dimensions)
-def test_kmeans_ooc_nan_fit():
-	centroids_copy = numpy.copy(centroids)
-	model1 = Kmeans(4, centroids_copy, n_init=1)
-	model1.fit(X_nan)
+def test_kmeans_ooc_nan_fit(five_dimensions):
+    X, X_nan, centroids, model = five_dimensions
+    centroids_copy = numpy.copy(centroids)
+    model1 = Kmeans(4, centroids_copy, n_init=1)
+    model1.fit(X_nan)
 
-	centroids_copy = numpy.copy(centroids)
-	model2 = Kmeans(4, centroids_copy, n_init=1)
-	model2.fit(X_nan, batch_size=10)
+    centroids_copy = numpy.copy(centroids)
+    model2 = Kmeans(4, centroids_copy, n_init=1)
+    model2.fit(X_nan, batch_size=10)
 
-	centroids_copy = numpy.copy(centroids)
-	model3 = Kmeans(4, centroids_copy, n_init=1)
-	model3.fit(X_nan, batch_size=1)
+    centroids_copy = numpy.copy(centroids)
+    model3 = Kmeans(4, centroids_copy, n_init=1)
+    model3.fit(X_nan, batch_size=1)
 
-	assert_array_almost_equal(model1.centroids, model2.centroids, 4)
-	assert_array_almost_equal(model1.centroids, model3.centroids, 4)
+    assert_array_almost_equal(model1.centroids, model2.centroids, 4)
+    assert_array_almost_equal(model1.centroids, model3.centroids, 4)
 
 
-@with_setup(setup_five_dimensions)
-def test_kmeans_minibatch_nan_from_samples():
-	model1 = Kmeans.from_samples(4, X_nan, init='first-k', batch_size=10)
-	model2 = Kmeans.from_samples(4, X_nan, init='first-k', batch_size=None)
-	model3 = Kmeans.from_samples(4, X_nan, init='first-k', batch_size=10, batches_per_epoch=1)
-	model4 = Kmeans.from_samples(4, X_nan, init='first-k', batch_size=10, batches_per_epoch=2)
+def test_kmeans_minibatch_nan_from_samples(five_dimensions):
+    X, X_nan, centroids, model = five_dimensions
+    model1 = Kmeans.from_samples(4, X_nan, init='first-k', batch_size=10)
+    model2 = Kmeans.from_samples(4, X_nan, init='first-k', batch_size=None)
+    model3 = Kmeans.from_samples(4, X_nan, init='first-k', batch_size=10, batches_per_epoch=1)
+    model4 = Kmeans.from_samples(4, X_nan, init='first-k', batch_size=10, batches_per_epoch=2)
 
-	assert_array_almost_equal(model1.centroids, model2.centroids)
-	assert_array_almost_equal(model1.centroids, model4.centroids)
-	assert_raises(AssertionError, assert_array_equal, model1.centroids, model3.centroids)
+    assert_array_almost_equal(model1.centroids, model2.centroids)
+    assert_array_almost_equal(model1.centroids, model4.centroids)
+    with pytest.raises(AssertionError):
+        assert_array_equal(model1.centroids, model3.centroids)
 
 
-@with_setup(setup_five_dimensions)
-def test_kmeans_minibatch_nan_fit():
-	centroids_copy = numpy.copy(centroids)
-	model1 = Kmeans(4, centroids_copy, n_init=1)
-	model1.fit(X, batch_size=10)
+def test_kmeans_minibatch_nan_fit(five_dimensions):
+    X, X_nan, centroids, model = five_dimensions
+    centroids_copy = numpy.copy(centroids)
+    model1 = Kmeans(4, centroids_copy, n_init=1)
+    model1.fit(X, batch_size=10)
 
-	centroids_copy = numpy.copy(centroids)
-	model2 = Kmeans(4, centroids_copy, n_init=1)
-	model2.fit(X, batch_size=None)
+    centroids_copy = numpy.copy(centroids)
+    model2 = Kmeans(4, centroids_copy, n_init=1)
+    model2.fit(X, batch_size=None)
 
-	centroids_copy = numpy.copy(centroids)
-	model3 = Kmeans(4, centroids_copy, n_init=1)
-	model3.fit(X, batch_size=10, batches_per_epoch=1)
+    centroids_copy = numpy.copy(centroids)
+    model3 = Kmeans(4, centroids_copy, n_init=1)
+    model3.fit(X, batch_size=10, batches_per_epoch=1)
 
-	centroids_copy = numpy.copy(centroids)
-	model4 = Kmeans(4, centroids_copy, n_init=1)
-	model4.fit(X, batch_size=10, batches_per_epoch=2)
+    centroids_copy = numpy.copy(centroids)
+    model4 = Kmeans(4, centroids_copy, n_init=1)
+    model4.fit(X, batch_size=10, batches_per_epoch=2)
 
-	assert_array_almost_equal(model1.centroids, model2.centroids)
-	assert_array_almost_equal(model1.centroids, model4.centroids)
-	assert_raises(AssertionError, assert_array_equal, model1.centroids, model3.centroids)
+    assert_array_almost_equal(model1.centroids, model2.centroids)
+    assert_array_almost_equal(model1.centroids, model4.centroids)
+    with pytest.raises(AssertionError):
+        assert_array_equal(model1.centroids, model3.centroids)
--- python-pomegranate.orig/tests/test_markov_network.py
+++ python-pomegranate/tests/test_markov_network.py
@@ -9,26 +9,17 @@
 
 from pomegranate import JointProbabilityTable
 from pomegranate import MarkovNetwork
-from pomegranate.io import DataGenerator
-from pomegranate.io import DataFrameGenerator
 
-from .tools import with_setup
-from .tools import assert_equal
-from .tools import assert_raises
-from .tools import assert_true
-from .tools import assert_almost_equal
+from .assert_tools import assert_almost_equal
 
 from numpy.testing import assert_array_equal
-from numpy.testing import assert_array_almost_equal
 
-import pandas
-import random, numpy
-import sys
-
-def setup_markov_network_int():
-	global d1, d2, d3
-	global model1, model2, model3, model4
+import numpy
+import pytest
 
+
+@pytest.fixture
+def markov_network_int():
 	d1 = JointProbabilityTable([
 		[0, 0, 0.1],
 		[0, 1, 0.2],
@@ -66,11 +57,11 @@
 
 	model4 = MarkovNetwork([d1, d3])
 	model4.bake()
+	return d1, d2, d3, model1, model2, model3, model4
 
-def setup_markov_network_str():
-	global d1, d2, d3
-	global model1, model2, model3, model4
 
+@pytest.fixture
+def markov_network_str():
 	d1 = JointProbabilityTable([
 		['0', '0', 0.1],
 		['0', '1', 0.2],
@@ -108,11 +99,11 @@
 
 	model4 = MarkovNetwork([d1, d3])
 	model4.bake()
+	return d1, d2, d3, model1, model2, model3, model4
 
-def setup_markov_network_bool():
-	global d1, d2, d3
-	global model1, model2, model3, model4
 
+@pytest.fixture
+def markov_network_bool():
 	d1 = JointProbabilityTable([
 		[False, False, 0.1],
 		[False, True,  0.2],
@@ -150,11 +141,11 @@
 
 	model4 = MarkovNetwork([d1, d3])
 	model4.bake()
+	return d1, d2, d3, model1, model2, model3, model4
 
-def setup_markov_network_mixed():
-	global d1, d2, d3
-	global model1, model2, model3, model4
 
+@pytest.fixture
+def markov_network_mixed():
 	d1 = JointProbabilityTable([
 		[False, 'blue', 0.1],
 		[False, 'red',  0.2],
@@ -192,12 +183,12 @@
 
 	model4 = MarkovNetwork([d1, d3])
 	model4.bake()
+	return d1, d2, d3, model1, model2, model3, model4
 
-def teardown():
-	pass
 
 def test_initialize():
-	assert_raises(ValueError, MarkovNetwork, [])
+	with pytest.raises(ValueError):
+		MarkovNetwork([])
 
 	d1 = JointProbabilityTable([
 		[0, 0, 0.2],
@@ -207,37 +198,37 @@
 
 	model = MarkovNetwork([d1])
 
-@with_setup(setup_markov_network_int, teardown)
-def test_structure():
-	assert_equal(model1.structure, ((0, 1),))
-	assert_equal(model2.structure, ((0, 1), (1, 2, 3)))
-	assert_equal(model3.structure, ((0, 1), (1, 2, 3), (2, 3, 4)))
-	assert_equal(model4.structure, ((0, 1), (2, 3, 4)))
+def test_structure(markov_network_int):
+	d1, d2, d3, model1, model2, model3, model4 = markov_network_int
+	assert model1.structure == ((0, 1),)
+	assert model2.structure == ((0, 1), (1, 2, 3))
+	assert model3.structure == ((0, 1), (1, 2, 3), (2, 3, 4))
+	assert model4.structure == ((0, 1), (2, 3, 4))
 
-@with_setup(setup_markov_network_int, teardown)
-def test_partition():
+def test_partition(markov_network_int):
+	d1, d2, d3, model1, model2, model3, model4 = markov_network_int
 	model3.bake()
-	assert_true(model3.partition != float("inf"))
+	assert model3.partition != float("inf")
 
 	model3.bake(calculate_partition=False)
-	assert_true(model3.partition == float("inf"))
+	assert model3.partition == float("inf")
 
-@with_setup(setup_markov_network_int, teardown)
-def test_d():
-	assert_equal(model1.d, 2)
-	assert_equal(model2.d, 4)
-	assert_equal(model3.d, 5)
-	assert_equal(model4.d, 5)
-
-@with_setup(setup_markov_network_mixed, teardown)
-def test_d_mixed():
-	assert_equal(model1.d, 2)
-	assert_equal(model2.d, 4)
-	assert_equal(model3.d, 5)
-	assert_equal(model4.d, 5)
+def test_d(markov_network_int):
+	d1, d2, d3, model1, model2, model3, model4 = markov_network_int
+	assert model1.d == 2
+	assert model2.d == 4
+	assert model3.d == 5
+	assert model4.d == 5
+
+def test_d_mixed(markov_network_mixed):
+	d1, d2, d3, model1, model2, model3, model4 = markov_network_mixed
+	assert model1.d == 2
+	assert model2.d == 4
+	assert model3.d == 5
+	assert model4.d == 5
 
-@with_setup(setup_markov_network_int, teardown)
-def test_log_probability_int():
+def test_log_probability_int(markov_network_int):
+	d1, d2, d3, model1, model2, model3, model4 = markov_network_int
 	x = [1, 0]
 	logp1 = model1.log_probability(x)
 	logp2 = d1.log_probability(x)
@@ -249,7 +240,8 @@
 	logp1 = model2.log_probability(x)
 	logp2 = d1.log_probability(x[:2]) + d2.log_probability(x[1:])
 
-	assert_raises(AssertionError, assert_almost_equal, logp1, logp2)
+	with pytest.raises(AssertionError):
+		assert_almost_equal(logp1, logp2)
 	assert_almost_equal(logp1, -3.7297014467295373)
 	
 	x = [1, 0, 1, 0, 1]
@@ -257,7 +249,8 @@
 	logp2 = (d1.log_probability(x[:2]) + d2.log_probability(x[1:4])
 		+ d3.log_probability(x[2:]))
 
-	assert_raises(AssertionError, assert_almost_equal, logp1, logp2)
+	with pytest.raises(AssertionError):
+		assert_almost_equal(logp1, logp2)
 	assert_almost_equal(logp1, -4.429966143312331)
 
 	logp3 = model4.log_probability(x)
@@ -265,10 +258,11 @@
 
 	assert_almost_equal(logp3, logp4)
 	assert_almost_equal(logp3, -3.7297014486341915)
-	assert_raises(AssertionError, assert_almost_equal, logp1, logp3)
+	with pytest.raises(AssertionError):
+		assert_almost_equal(logp1, logp3)
 
-@with_setup(setup_markov_network_str, teardown)
-def test_log_probability_str():
+def test_log_probability_str(markov_network_str):
+	d1, d2, d3, model1, model2, model3, model4 = markov_network_str
 	x = ['1', '0']
 	logp1 = model1.log_probability(x)
 	logp2 = d1.log_probability(x)
@@ -280,7 +274,8 @@
 	logp1 = model2.log_probability(x)
 	logp2 = d1.log_probability(x[:2]) + d2.log_probability(x[1:])
 
-	assert_raises(AssertionError, assert_almost_equal, logp1, logp2)
+	with pytest.raises(AssertionError):
+		assert_almost_equal(logp1, logp2)
 	assert_almost_equal(logp1, -3.7297014467295373)
 	
 	x = ['1', '0', '1', '0', '1']
@@ -288,7 +283,8 @@
 	logp2 = (d1.log_probability(x[:2]) + d2.log_probability(x[1:4])
 		+ d3.log_probability(x[2:]))
 
-	assert_raises(AssertionError, assert_almost_equal, logp1, logp2)
+	with pytest.raises(AssertionError):
+		assert_almost_equal(logp1, logp2)
 	assert_almost_equal(logp1, -4.429966143312331)
 
 	logp3 = model4.log_probability(x)
@@ -296,10 +292,11 @@
 
 	assert_almost_equal(logp3, logp4)
 	assert_almost_equal(logp3, -3.7297014486341915)
-	assert_raises(AssertionError, assert_almost_equal, logp1, logp3)
+	with pytest.raises(AssertionError):
+		assert_almost_equal(logp1, logp3)
 
-@with_setup(setup_markov_network_bool, teardown)
-def test_log_probability_bool():
+def test_log_probability_bool(markov_network_bool):
+	d1, d2, d3, model1, model2, model3, model4 = markov_network_bool
 	x = [True, False]
 	logp1 = model1.log_probability(x)
 	logp2 = d1.log_probability(x)
@@ -311,7 +308,8 @@
 	logp1 = model2.log_probability(x)
 	logp2 = d1.log_probability(x[:2]) + d2.log_probability(x[1:])
 
-	assert_raises(AssertionError, assert_almost_equal, logp1, logp2)
+	with pytest.raises(AssertionError):
+		assert_almost_equal(logp1, logp2)
 	assert_almost_equal(logp1, -3.7297014467295373)
 	
 	x = [True, False, True, False, True]
@@ -319,7 +317,8 @@
 	logp2 = (d1.log_probability(x[:2]) + d2.log_probability(x[1:4])
 		+ d3.log_probability(x[2:]))
 
-	assert_raises(AssertionError, assert_almost_equal, logp1, logp2)
+	with pytest.raises(AssertionError):
+		assert_almost_equal(logp1, logp2)
 	assert_almost_equal(logp1, -4.429966143312331)
 
 	logp3 = model4.log_probability(x)
@@ -327,10 +326,11 @@
 
 	assert_almost_equal(logp3, logp4)
 	assert_almost_equal(logp3, -3.7297014486341915)
-	assert_raises(AssertionError, assert_almost_equal, logp1, logp3)
+	with pytest.raises(AssertionError):
+		assert_almost_equal(logp1, logp3)
 
-@with_setup(setup_markov_network_mixed, teardown)
-def test_log_probability_mixed():
+def test_log_probability_mixed(markov_network_mixed):
+	d1, d2, d3, model1, model2, model3, model4 = markov_network_mixed
 	x = [True, 'blue']
 
 	logp1 = model1.log_probability(x)
@@ -343,7 +343,8 @@
 	logp1 = model2.log_probability(x)
 	logp2 = d1.log_probability(x[:2]) + d2.log_probability(x[1:])
 
-	assert_raises(AssertionError, assert_almost_equal, logp1, logp2)
+	with pytest.raises(AssertionError):
+		assert_almost_equal(logp1, logp2)
 	assert_almost_equal(logp1, -3.7297014467295373)
 	
 	x = [1, 'blue', True, 0, 'b']
@@ -351,7 +352,8 @@
 	logp2 = (d1.log_probability(x[:2]) + d2.log_probability(x[1:4])
 		+ d3.log_probability(x[2:]))
 
-	assert_raises(AssertionError, assert_almost_equal, logp1, logp2)
+	with pytest.raises(AssertionError):
+		assert_almost_equal(logp1, logp2)
 	assert_almost_equal(logp1, -4.429966143312331)
 
 	logp3 = model4.log_probability(x)
@@ -359,10 +361,11 @@
 
 	assert_almost_equal(logp3, logp4)
 	assert_almost_equal(logp3, -3.7297014486341915)
-	assert_raises(AssertionError, assert_almost_equal, logp1, logp3)
+	with pytest.raises(AssertionError):
+		assert_almost_equal(logp1, logp3)
 
-@with_setup(setup_markov_network_int, teardown)
-def test_log_probability_unnormalized_int():
+def test_log_probability_unnormalized_int(markov_network_int):
+	d1, d2, d3, model1, model2, model3, model4 = markov_network_int
 	x = [1, 0]
 	logp1 = model1.log_probability(x, unnormalized=True)
 	logp2 = d1.log_probability(x)
@@ -390,10 +393,11 @@
 
 	assert_almost_equal(logp3, logp4)
 	assert_almost_equal(logp3, numpy.log(0.4 * 0.06))
-	assert_raises(AssertionError, assert_almost_equal, logp1, logp3)
+	with pytest.raises(AssertionError):
+		assert_almost_equal(logp1, logp3)
 
-@with_setup(setup_markov_network_str, teardown)
-def test_log_probability_unnormalized_str():
+def test_log_probability_unnormalized_str(markov_network_str):
+	d1, d2, d3, model1, model2, model3, model4 = markov_network_str
 	x = ['1', '0']
 	logp1 = model1.log_probability(x, unnormalized=True)
 	logp2 = d1.log_probability(x)
@@ -421,10 +425,11 @@
 
 	assert_almost_equal(logp3, logp4)
 	assert_almost_equal(logp3, numpy.log(0.4 * 0.06))
-	assert_raises(AssertionError, assert_almost_equal, logp1, logp3)
+	with pytest.raises(AssertionError):
+		assert_almost_equal(logp1, logp3)
 
-@with_setup(setup_markov_network_bool, teardown)
-def test_log_probability_unnormalized_bool():
+def test_log_probability_unnormalized_bool(markov_network_bool):
+	d1, d2, d3, model1, model2, model3, model4 = markov_network_bool
 	x = [True, False]
 	logp1 = model1.log_probability(x, unnormalized=True)
 	logp2 = d1.log_probability(x)
@@ -452,10 +457,11 @@
 
 	assert_almost_equal(logp3, logp4)
 	assert_almost_equal(logp3, numpy.log(0.4 * 0.06))
-	assert_raises(AssertionError, assert_almost_equal, logp1, logp3)
+	with pytest.raises(AssertionError):
+		assert_almost_equal(logp1, logp3)
 
-@with_setup(setup_markov_network_mixed, teardown)
-def test_log_probability_unnormalized_mixed():
+def test_log_probability_unnormalized_mixed(markov_network_mixed):
+	d1, d2, d3, model1, model2, model3, model4 = markov_network_mixed
 	x = [True, 'blue']
 	logp1 = model1.log_probability(x, unnormalized=True)
 	logp2 = d1.log_probability(x)
@@ -483,10 +489,11 @@
 
 	assert_almost_equal(logp3, logp4)
 	assert_almost_equal(logp3, numpy.log(0.4 * 0.06))
-	assert_raises(AssertionError, assert_almost_equal, logp1, logp3)
+	with pytest.raises(AssertionError):
+		assert_almost_equal(logp1, logp3)
 
-@with_setup(setup_markov_network_int, teardown)
-def test_predict_int():
+def test_predict_int(markov_network_int):
+	d1, d2, d3, model1, model2, model3, model4 = markov_network_int
 	assert_array_equal(model1.predict([[1, None]]), [[1, 0]])
 	assert_array_equal(model1.predict([[None, 1]]), [[1, 1]])
 
@@ -507,8 +514,8 @@
 	assert_array_equal(model3.predict([[None, None, None, None, 1]]), 
 		[[1, 1, 0, 1, 1]])
 
-@with_setup(setup_markov_network_str, teardown)
-def test_predict_str():
+def test_predict_str(markov_network_str):
+	d1, d2, d3, model1, model2, model3, model4 = markov_network_str
 	assert_array_equal(model1.predict([['1', None]]), [['1', '0']])
 	assert_array_equal(model1.predict([[None, '1']]), [['1', '1']])
 
@@ -529,8 +536,8 @@
 	assert_array_equal(model3.predict([[None, None, None, None, '1']]), 
 		[['1', '1', '0', '1', '1']])
 
-@with_setup(setup_markov_network_bool, teardown)
-def test_predict_bool():
+def test_predict_bool(markov_network_bool):
+	d1, d2, d3, model1, model2, model3, model4 = markov_network_bool
 	assert_array_equal(model1.predict([[True, None]]), [[True, False]])
 	assert_array_equal(model1.predict([[None, True]]), [[True, True]])
 
@@ -552,8 +559,8 @@
 		[[True, True, False, True, True]])
 
 
-@with_setup(setup_markov_network_mixed, teardown)
-def test_predict_mixed():
+def test_predict_mixed(markov_network_mixed):
+	d1, d2, d3, model1, model2, model3, model4 = markov_network_mixed
 	assert_array_equal(model1.predict([[True, None]]), 
 		numpy.array([[True, 'blue']], dtype=object))
 	assert_array_equal(model1.predict([[None, 'red']]), 
--- python-pomegranate.orig/tests/test_naive_bayes.py
+++ python-pomegranate/tests/test_naive_bayes.py
@@ -4,34 +4,31 @@
 from pomegranate.io import DataGenerator
 from pomegranate.io import DataFrameGenerator
 
-from .tools import with_setup
-from .tools import assert_almost_equal
-from .tools import assert_equal
-from .tools import assert_not_equal
-from .tools import assert_less_equal
-from .tools import assert_raises
-from .tools import assert_true
-from numpy.testing import assert_array_equal
+from .assert_tools import assert_almost_equal
 from numpy.testing import assert_array_almost_equal
 
 import pandas
-import random
 import pickle
 import numpy as np
 
-nan = numpy.nan
+import pytest
 
-def setup_univariate_mixed():
-	normal = NormalDistribution(5, 2)
-	uniform = UniformDistribution(0, 10)
+nan = np.nan
 
-	global model
-	model = NaiveBayes([normal, uniform])
 
-	global X
-	X = numpy.array([[5], [3], [1], [-1]])
+@pytest.fixture
+def univariate_mixed():
+    normal = NormalDistribution(5, 2)
+    uniform = UniformDistribution(0, 10)
 
-def setup_multivariate_gaussian():
+    model = NaiveBayes([normal, uniform])
+
+    X = numpy.array([[5], [3], [1], [-1]])
+    return model, X
+
+
+@pytest.fixture
+def multivariate_gaussian():
 	d11 = NormalDistribution(0.0, 1)
 	d12 = NormalDistribution(0.5, 1)
 	d13 = NormalDistribution(0.3, 1)
@@ -42,28 +39,26 @@
 	d23 = NormalDistribution(1.5, 1)
 	d2 = IndependentComponentsDistribution([d21, d22, d23])
 
-	global model
 	model = NaiveBayes([d1, d2])
 
-	global X
 	X = numpy.array([[0.3, 0.5, 0.1],
 					 [0.8, 1.4, 0.5],
 					 [1.4, 2.6, 1.8],
 					 [4.2, 3.3, 3.7],
 					 [2.6, 3.6, 3.3]])
 
-	global y
 	y = [0, 0, 0, 1, 1]
 
-	global X_nan
 	X_nan = numpy.array([[0.3, nan, 0.1],
 		     			 [nan, 1.4, nan],
 			     		 [1.4, 2.6, nan],
 				    	 [nan, nan, nan],
 					     [nan, 3.6, 3.3]])
+	return model, X, y, X_nan
 
 
-def setup_multivariate_mixed():
+@pytest.fixture
+def multivariate_mixed():
 	d11 = ExponentialDistribution(5)
 	d12 = LogNormalDistribution(0.5, 0.78)
 	d13 = PoissonDistribution(4)
@@ -74,41 +69,34 @@
 	d23 = PoissonDistribution(6)
 	d2 = IndependentComponentsDistribution([d21, d22, d23])
 
-	global model
 	model = NaiveBayes([d1, d2])
 
-	global X
 	X = numpy.array([[0.3, 0.5, 0.1],
 					 [0.8, 1.4, 0.5],
 					 [1.4, 2.6, 1.8],
 					 [4.2, 3.3, 3.7],
 					 [2.6, 3.6, 3.3]])
 
-	global y
 	y = [0, 0, 0, 1, 1]
 
-	global X_nan
 	X_nan = numpy.array([[0.3, nan, 0.1],
 		     			 [nan, 1.4, nan],
 			     		 [1.4, 2.6, nan],
 				    	 [nan, nan, nan],
 					     [nan, 3.6, 3.3]])
+	return model, X, y, X_nan
 
 
-def teardown():
-	pass
-
-
-@with_setup(setup_univariate_mixed, teardown)
-def test_nb_univariate_initialization():
-	assert_equal(model.d, 1)
-	assert_equal(model.n, 2)
+def test_nb_univariate_initialization(univariate_mixed):
+	model, X = univariate_mixed
+	assert model.d == 1
+	assert model.n == 2
 
 
-@with_setup(setup_multivariate_mixed, teardown)
-def test_nb_multivariate_initialization():
-	assert_equal(model.d, 3)
-	assert_equal(model.n, 2)
+def test_nb_multivariate_initialization(multivariate_mixed):
+	model, X, y, X_nan = multivariate_mixed
+	assert model.d == 3
+	assert model.n == 2
 
 
 def test_nb_univariate_constructors():
@@ -117,9 +105,12 @@
 	d3 = IndependentComponentsDistribution([NormalDistribution(0, 1),
 		NormalDistribution(2, 1), NormalDistribution(3, 1)])
 
-	assert_raises(TypeError, NaiveBayes, [d1, d2])
-	assert_raises(TypeError, NaiveBayes, [d1, d3])
-	assert_raises(ValueError, NaiveBayes, [NormalDistribution])
+	with pytest.raises(TypeError):
+		NaiveBayes([d1, d2])
+	with pytest.raises(TypeError):
+		NaiveBayes([d1, d3])
+	with pytest.raises(ValueError):
+		NaiveBayes([NormalDistribution])
 
 
 def test_nb_multivariate_constructors():
@@ -130,14 +121,18 @@
 		NormalDistribution(2, 1)])
 
 	NaiveBayes([d1, d3])
-	assert_raises(TypeError, NaiveBayes, [d2, d3])
-	assert_raises(TypeError, NaiveBayes, [d2, d1])
-	assert_raises(ValueError, NaiveBayes, [MultivariateGaussianDistribution])
-	assert_raises(ValueError, NaiveBayes, [IndependentComponentsDistribution])
+	with pytest.raises(TypeError):
+		NaiveBayes([d2, d3])
+	with pytest.raises(TypeError):
+		NaiveBayes([d2, d1])
+	with pytest.raises(ValueError):
+		NaiveBayes([MultivariateGaussianDistribution])
+	with pytest.raises(ValueError):
+		NaiveBayes([IndependentComponentsDistribution])
 
 
-@with_setup(setup_univariate_mixed, teardown)
-def test_nb_univariate_mixed_predict_log_proba():
+def test_nb_univariate_mixed_predict_log_proba(univariate_mixed):
+	model, X = univariate_mixed
 	y_hat = model.predict_log_proba(X)
 	y = [[-0.4063484, -1.096847],
 	     [-0.6024268, -0.792926],
@@ -147,8 +142,8 @@
 	assert_array_almost_equal(y, y_hat)
 
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_nb_multivariate_gaussian_predict_log_proba():
+def test_nb_multivariate_gaussian_predict_log_proba(multivariate_gaussian):
+	model, X, y, X_nan = multivariate_gaussian
 	y_hat = model.predict_log_proba(X)
 	y = [[ -2.194303e-01,    -1.624430e+00],
  		 [ -8.00891133e-01,  -5.95891133e-01],
@@ -159,8 +154,8 @@
 	assert_array_almost_equal(y, y_hat)
 
 
-@with_setup(setup_multivariate_mixed, teardown)
-def test_nb_multivariate_mixed_predict_log_proba():
+def test_nb_multivariate_mixed_predict_log_proba(multivariate_mixed):
+	model, X, y, X_nan = multivariate_mixed
 	y_hat = model.predict_log_proba(X)
 	y = [[ -3.96979060e-05,  -1.01342320e+01],
 		 [ -1.43325352e-11,  -2.49684574e+01],
@@ -171,8 +166,8 @@
 	assert_array_almost_equal(y, y_hat)
 
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_nb_multivariate_gaussian_nan_predict_log_proba():
+def test_nb_multivariate_gaussian_nan_predict_log_proba(multivariate_gaussian):
+	model, X, y, X_nan = multivariate_gaussian
 	y_hat = model.predict_log_proba(X_nan)
 	y = [[-0.27268481, -1.43268481],
 		 [-0.90406199, -0.51906199],
@@ -183,8 +178,8 @@
 	assert_array_almost_equal(y, y_hat)
 
 
-@with_setup(setup_multivariate_mixed, teardown)
-def test_nb_multivariate_mixed_nan_predict_log_proba():
+def test_nb_multivariate_mixed_nan_predict_log_proba(multivariate_mixed):
+	model, X, y, X_nan = multivariate_mixed
 	y_hat = model.predict_log_proba(X_nan)
 	y = [[ -1.21742279e-04,  -9.01366508e+00],
 		 [ -2.83092062e-01,  -1.40019217e+00],
@@ -195,8 +190,8 @@
 	assert_array_almost_equal(y, y_hat)
 
 
-@with_setup(setup_univariate_mixed, teardown)
-def test_nb_univariate_mixed_predict_log_proba_parallel():
+def test_nb_univariate_mixed_predict_log_proba_parallel(univariate_mixed):
+	model, X = univariate_mixed
 	y_hat = model.predict_log_proba(X, n_jobs=2)
 	y = [[-0.4063484, -1.096847],
 	     [-0.6024268, -0.792926],
@@ -206,8 +201,8 @@
 	assert_array_almost_equal(y, y_hat)
 
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_nb_multivariate_gaussian_predict_log_proba_parallel():
+def test_nb_multivariate_gaussian_predict_log_proba_parallel(multivariate_gaussian):
+	model, X, y, X_nan = multivariate_gaussian
 	y_hat = model.predict_log_proba(X, n_jobs=2)
 	y = [[ -2.194303e-01,    -1.624430e+00],
  		 [ -8.00891133e-01,  -5.95891133e-01],
@@ -218,8 +213,8 @@
 	assert_array_almost_equal(y, y_hat)
 
 
-@with_setup(setup_multivariate_mixed, teardown)
-def test_nb_multivariate_mixed_predict_log_proba_parallel():
+def test_nb_multivariate_mixed_predict_log_proba_parallel(multivariate_mixed):
+	model, X, y, X_nan = multivariate_mixed
 	y_hat = model.predict_log_proba(X, n_jobs=2)
 	y = [[ -3.96979060e-05,  -1.01342320e+01],
 		 [ -1.43325352e-11,  -2.49684574e+01],
@@ -230,8 +225,8 @@
 	assert_array_almost_equal(y, y_hat)
 
 
-@with_setup(setup_univariate_mixed, teardown)
-def test_nb_univariate_mixed_predict_proba():
+def test_nb_univariate_mixed_predict_proba(univariate_mixed):
+	model, X = univariate_mixed
 	y_hat = model.predict_proba(X)
 	y = [[ 0.66607801,  0.33392199],
 		 [ 0.54748134,  0.45251866],
@@ -241,8 +236,8 @@
 	assert_array_almost_equal(y, y_hat)
 
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_nb_multivariate_gaussian_predict_proba():
+def test_nb_multivariate_gaussian_predict_proba(multivariate_gaussian):
+	model, X, y, X_nan = multivariate_gaussian
 	y_hat = model.predict_proba(X)
 	y =	[[  8.02976114e-01,   1.97023886e-01],
 		 [  4.48928731e-01,   5.51071269e-01],
@@ -253,8 +248,8 @@
 	assert_array_almost_equal(y, y_hat)
 
 
-@with_setup(setup_multivariate_mixed, teardown)
-def test_nb_multivariate_mixed_predict_proba():
+def test_nb_multivariate_mixed_predict_proba(multivariate_mixed):
+	model, X, y, X_nan = multivariate_mixed
 	y_hat = model.predict_proba(X)
 	y = [[  9.99960303e-01,   3.96971181e-05],
 		 [  1.00000000e+00,   1.43329876e-11],
@@ -265,8 +260,8 @@
 	assert_array_almost_equal(y, y_hat)
 
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_nb_multivariate_gaussian_nan_predict_proba():
+def test_nb_multivariate_gaussian_nan_predict_proba(multivariate_gaussian):
+	model, X, y, X_nan = multivariate_gaussian
 	y_hat = model.predict_proba(X_nan)
 	y = [[ 0.76133271,  0.23866729],
 		 [ 0.40492153,  0.59507847],
@@ -277,8 +272,8 @@
 	assert_array_almost_equal(y, y_hat)
 
 
-@with_setup(setup_multivariate_mixed, teardown)
-def test_nb_multivariate_mixed_nan_predict_proba():
+def test_nb_multivariate_mixed_nan_predict_proba(multivariate_mixed):
+	model, X, y, X_nan = multivariate_mixed
 	y_hat = model.predict_proba(X_nan)
 	y = [[  9.99878265e-01,   1.21734869e-04],
 		 [  7.53450421e-01,   2.46549579e-01],
@@ -289,8 +284,8 @@
 	assert_array_almost_equal(y, y_hat)
 
 
-@with_setup(setup_univariate_mixed, teardown)
-def test_nb_univariate_mixed_predict_proba_parallel():
+def test_nb_univariate_mixed_predict_proba_parallel(univariate_mixed):
+	model, X = univariate_mixed
 	y_hat = model.predict_proba(X, n_jobs=2)
 	y =  [[ 0.66607801,  0.33392199],
 		  [ 0.54748134,  0.45251866],
@@ -300,8 +295,8 @@
 	assert_array_almost_equal(y, y_hat)
 
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_nb_multivariate_gaussian_predict_proba_parallel():
+def test_nb_multivariate_gaussian_predict_proba_parallel(multivariate_gaussian):
+	model, X, y, X_nan = multivariate_gaussian
 	y_hat = model.predict_proba(X, n_jobs=2)
 	y = [[  8.02976114e-01,   1.97023886e-01],
 		 [  4.48928731e-01,   5.51071269e-01],
@@ -312,8 +307,8 @@
 	assert_array_almost_equal(y, y_hat)
 
 
-@with_setup(setup_multivariate_mixed, teardown)
-def test_nb_multivariate_mixed_predict_proba_parallel():
+def test_nb_multivariate_mixed_predict_proba_parallel(multivariate_mixed):
+	model, X, y, X_nan = multivariate_mixed
 	y_hat = model.predict_proba(X, n_jobs=2)
 	y = [[  9.99960303e-01,   3.96971181e-05,],
 		 [  1.00000000e+00,   1.43329876e-11,],
@@ -324,72 +319,72 @@
 	assert_array_almost_equal(y, y_hat)
 
 
-@with_setup(setup_univariate_mixed, teardown)
-def test_nb_univariate_mixed_predict():
+def test_nb_univariate_mixed_predict(univariate_mixed):
+	model, X = univariate_mixed
 	y_hat = model.predict(X)
 	y = [0, 0, 1, 0]
 
 	assert_array_almost_equal(y, y_hat)
 
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_nb_multivariate_gaussian_predict():
+def test_nb_multivariate_gaussian_predict(multivariate_gaussian):
+	model, X, y, X_nan = multivariate_gaussian
 	y_hat = model.predict(X)
 	y = [0, 1, 1, 1, 1]
 
 	assert_array_almost_equal(y, y_hat)
 
 
-@with_setup(setup_multivariate_mixed, teardown)
-def test_nb_multivariate_mixed_predict():
+def test_nb_multivariate_mixed_predict(multivariate_mixed):
+	model, X, y, X_nan = multivariate_mixed
 	y_hat = model.predict(X)
 	y = [0, 0, 0, 0, 0]
 
 	assert_array_almost_equal(y, y_hat)
 
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_nb_multivariate_gaussian_nan_predict():
+def test_nb_multivariate_gaussian_nan_predict(multivariate_gaussian):
+	model, X, y, X_nan = multivariate_gaussian
 	y_hat = model.predict(X_nan)
 	y = [0, 1, 1, 0, 1]
 
 	assert_array_almost_equal(y, y_hat)
 
 
-@with_setup(setup_multivariate_mixed, teardown)
-def test_nb_multivariate_mixed_nan_predict():
+def test_nb_multivariate_mixed_nan_predict(multivariate_mixed):
+	model, X, y, X_nan = multivariate_mixed
 	y_hat = model.predict(X_nan)
 	y = [0, 0, 0, 0, 0]
 
 	assert_array_almost_equal(y, y_hat)
 
 
-@with_setup(setup_univariate_mixed, teardown)
-def test_nb_univariate_mixed_predict_parallel():
+def test_nb_univariate_mixed_predict_parallel(univariate_mixed):
+	model, X = univariate_mixed
 	y_hat = model.predict(X, n_jobs=2)
 	y = [0, 0, 1, 0]
 
 	assert_array_almost_equal(y, y_hat)
 
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_nb_multivariate_gaussian_predict_parallel():
+def test_nb_multivariate_gaussian_predict_parallel(multivariate_gaussian):
+	model, X, y, X_nan = multivariate_gaussian
 	y_hat = model.predict(X, n_jobs=2)
 	y = [0, 1, 1, 1, 1]
 
 	assert_array_almost_equal(y, y_hat)
 
 
-@with_setup(setup_multivariate_mixed, teardown)
-def test_nb_multivariate_mixed_predict_parallel():
+def test_nb_multivariate_mixed_predict_parallel(multivariate_mixed):
+	model, X, y, X_nan = multivariate_mixed
 	y_hat = model.predict(X, n_jobs=2)
 	y = [0, 0, 0, 0, 0]
 
 	assert_array_almost_equal(y, y_hat)
 
 
-@with_setup(setup_univariate_mixed, teardown)
-def test_nb_univariate_mixed_fit():
+def test_nb_univariate_mixed_fit(univariate_mixed):
+	model, X = univariate_mixed
 	X = np.array([5, 4, 5, 4, 6, 5, 6, 5, 4, 6, 5, 4, 0, 0,
 		1, 9, 8, 2, 0, 1, 1, 8, 10, 0]).reshape(-1, 1)
 	y = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
@@ -403,8 +398,8 @@
 	assert_array_almost_equal(d1.parameters, [4.916666666666667, 0.7592027982620252])
 	assert_array_almost_equal(d2.parameters, [0.0, 10.0])
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_nb_multivariate_gaussian_fit():
+def test_nb_multivariate_gaussian_fit(multivariate_gaussian):
+	model, X, y, X_nan = multivariate_gaussian
 	model.fit(X, y)
 
 	d11 = model.distributions[0].distributions[0]
@@ -422,8 +417,8 @@
 	assert_array_almost_equal(d23.parameters, [3.5, 0.19999999999999787])
 
 
-@with_setup(setup_multivariate_mixed, teardown)
-def test_nb_multivariate_mixed_fit():
+def test_nb_multivariate_mixed_fit(multivariate_mixed):
+	model, X, y, X_nan = multivariate_mixed
 	model.fit(X, y)
 
 	d11 = model.distributions[0].distributions[0]
@@ -441,8 +436,8 @@
 	assert_array_almost_equal(d23.parameters, [3.5])
 
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_nb_multivariate_gaussian_nan_fit():
+def test_nb_multivariate_gaussian_nan_fit(multivariate_gaussian):
+	model, X, y, X_nan = multivariate_gaussian
 	model.fit(X_nan, y)
 
 	d11 = model.distributions[0].distributions[0]
@@ -460,8 +455,8 @@
 	assert_array_almost_equal(d23.parameters, [3.3, 0.0])
 
 
-@with_setup(setup_multivariate_mixed, teardown)
-def test_nb_multivariate_mixed_nan_fit():
+def test_nb_multivariate_mixed_nan_fit(multivariate_mixed):
+	model, X, y, X_nan = multivariate_mixed
 	model.fit(X_nan, y)
 
 	d11 = model.distributions[0].distributions[0]
@@ -479,8 +474,8 @@
 	assert_array_almost_equal(d23.parameters, [3.3])
 
 
-@with_setup(setup_univariate_mixed, teardown)
-def test_nb_univariate_mixed_fit_parallel():
+def test_nb_univariate_mixed_fit_parallel(univariate_mixed):
+	model, X = univariate_mixed
 	X = np.array([5, 4, 5, 4, 6, 5, 6, 5, 4, 6, 5, 4, 0, 0,
 		1, 9, 8, 2, 0, 1, 1, 8, 10, 0]).reshape(-1, 1)
 	y = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
@@ -494,8 +489,8 @@
 	assert_array_almost_equal(d1.parameters, [4.916666666666667, 0.7592027982620252])
 	assert_array_almost_equal(d2.parameters, [0.0, 10.0])
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_nb_multivariate_gaussian_fit_parallel():
+def test_nb_multivariate_gaussian_fit_parallel(multivariate_gaussian):
+	model, X, y, X_nan = multivariate_gaussian
 	model.fit(X, y, n_jobs=2)
 
 	d11 = model.distributions[0].distributions[0]
@@ -513,8 +508,8 @@
 	assert_array_almost_equal(d23.parameters, [3.5, 0.19999999999999787])
 
 
-@with_setup(setup_multivariate_mixed, teardown)
-def test_nb_multivariate_mixed_fit_parallel():
+def test_nb_multivariate_mixed_fit_parallel(multivariate_mixed):
+	model, X, y, X_nan = multivariate_mixed
 	model.fit(X, y, n_jobs=2)
 
 	d11 = model.distributions[0].distributions[0]
@@ -532,8 +527,8 @@
 	assert_array_almost_equal(d23.parameters, [3.5])
 
 
-@with_setup(setup_univariate_mixed, teardown)
-def test_nb_univariate_mixed_from_samples():
+def test_nb_univariate_mixed_from_samples(univariate_mixed):
+	model, X = univariate_mixed
 	X = np.array([5, 4, 5, 4, 6, 5, 6, 5, 4, 6, 5, 4, 0, 0,
 		1, 9, 8, 2, 0, 1, 1, 8, 10, 0]).reshape(-1, 1)
 	y = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
@@ -548,8 +543,8 @@
 	assert_array_almost_equal(d1.parameters, [4.916666666666667, 0.7592027982620252])
 	assert_array_almost_equal(d2.parameters, [0.0, 10.0])
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_nb_multivariate_gaussian_from_samples():
+def test_nb_multivariate_gaussian_from_samples(multivariate_gaussian):
+	model, X, y, X_nan = multivariate_gaussian
 	model = NaiveBayes.from_samples(NormalDistribution, X, y)
 
 	d11 = model.distributions[0].distributions[0]
@@ -567,8 +562,8 @@
 	assert_array_almost_equal(d23.parameters, [3.5, 0.19999999999999787])
 
 
-@with_setup(setup_multivariate_mixed, teardown)
-def test_nb_multivariate_mixed_from_samples():
+def test_nb_multivariate_mixed_from_samples(multivariate_mixed):
+	model, X, y, X_nan = multivariate_mixed
 	d = [ExponentialDistribution, LogNormalDistribution, PoissonDistribution]
 	model = NaiveBayes.from_samples(d, X, y)
 
@@ -587,8 +582,8 @@
 	assert_array_almost_equal(d23.parameters, [3.5])
 
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_nb_multivariate_gaussian_nan_from_samples():
+def test_nb_multivariate_gaussian_nan_from_samples(multivariate_gaussian):
+	model, X, y, X_nan = multivariate_gaussian
 	model = NaiveBayes.from_samples(NormalDistribution, X_nan, y)
 
 	d11 = model.distributions[0].distributions[0]
@@ -606,8 +601,8 @@
 	assert_array_almost_equal(d23.parameters, [3.3, 0.0])
 
 
-@with_setup(setup_multivariate_mixed, teardown)
-def test_nb_multivariate_mixed_nan_from_samples():
+def test_nb_multivariate_mixed_nan_from_samples(multivariate_mixed):
+	model, X, y, X_nan = multivariate_mixed
 	d = [ExponentialDistribution, LogNormalDistribution, PoissonDistribution]
 	model = NaiveBayes.from_samples(d, X_nan, y)
 
@@ -626,8 +621,8 @@
 	assert_array_almost_equal(d23.parameters, [3.3])
 
 
-@with_setup(setup_univariate_mixed, teardown)
-def test_nb_univariate_mixed_from_samples_parallel():
+def test_nb_univariate_mixed_from_samples_parallel(univariate_mixed):
+	model, X = univariate_mixed
 	X = np.array([5, 4, 5, 4, 6, 5, 6, 5, 4, 6, 5, 4, 0, 0,
 		1, 9, 8, 2, 0, 1, 1, 8, 10, 0]).reshape(-1, 1)
 	y = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
@@ -642,8 +637,8 @@
 	assert_array_almost_equal(d1.parameters, [4.916666666666667, 0.7592027982620252])
 	assert_array_almost_equal(d2.parameters, [0.0, 10.0])
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_nb_multivariate_gaussian_from_samples_parallel():
+def test_nb_multivariate_gaussian_from_samples_parallel(multivariate_gaussian):
+	model, X, y, X_nan = multivariate_gaussian
 	model = NaiveBayes.from_samples(NormalDistribution, X, y, n_jobs=2)
 
 	d11 = model.distributions[0].distributions[0]
@@ -661,8 +656,8 @@
 	assert_array_almost_equal(d23.parameters, [3.5, 0.19999999999999787])
 
 
-@with_setup(setup_multivariate_mixed, teardown)
-def test_nb_multivariate_mixed_from_samples_parallel():
+def test_nb_multivariate_mixed_from_samples_parallel(multivariate_mixed):
+	model, X, y, X_nan = multivariate_mixed
 	d = [ExponentialDistribution, LogNormalDistribution, PoissonDistribution]
 	model = NaiveBayes.from_samples(d, X, y, n_jobs=2)
 
@@ -681,8 +676,8 @@
 	assert_array_almost_equal(d23.parameters, [3.5])
 
 
-@with_setup(setup_univariate_mixed, teardown)
-def test_raise_errors():
+def test_raise_errors(univariate_mixed):
+	model, X = univariate_mixed
 	# check raises no errors when converting values
 	model.predict_log_proba([[5]])
 	model.predict_log_proba([[4.5]])
@@ -699,38 +694,38 @@
 	model.predict([[5], [6]])
 	model.predict(np.array([[5], [6]]))
 
-@with_setup(setup_univariate_mixed, teardown)
-def test_pickling():
+def test_pickling(univariate_mixed):
+	model, X = univariate_mixed
 	j_univ = pickle.dumps(model)
 
 	new_univ = pickle.loads(j_univ)
-	assert_true(isinstance(new_univ.distributions[0], NormalDistribution))
-	assert_true(isinstance(new_univ.distributions[1], UniformDistribution))
-	assert_true(isinstance(new_univ, NaiveBayes))
+	assert isinstance(new_univ.distributions[0], NormalDistribution)
+	assert isinstance(new_univ.distributions[1], UniformDistribution)
+	assert isinstance(new_univ, NaiveBayes)
 	numpy.testing.assert_array_equal(model.weights, new_univ.weights)
 
-@with_setup(setup_univariate_mixed, teardown)
-def test_json():
+def test_json(univariate_mixed):
+	model, X = univariate_mixed
 	j_univ = model.to_json()
 
 	new_univ = model.from_json(j_univ)
-	assert_true(isinstance(new_univ.distributions[0], NormalDistribution))
-	assert_true(isinstance(new_univ.distributions[1], UniformDistribution))
-	assert_true(isinstance(new_univ, NaiveBayes))
+	assert isinstance(new_univ.distributions[0], NormalDistribution)
+	assert isinstance(new_univ.distributions[1], UniformDistribution)
+	assert isinstance(new_univ, NaiveBayes)
 	numpy.testing.assert_array_equal( model.weights, new_univ.weights)
 
-@with_setup(setup_univariate_mixed, teardown)
-def test_robust_from_json():
+def test_robust_from_json(univariate_mixed):
+	model, X = univariate_mixed
 	j_univ = model.to_json()
 
 	new_univ = from_json(j_univ)
-	assert_true(isinstance(new_univ.distributions[0], NormalDistribution))
-	assert_true(isinstance(new_univ.distributions[1], UniformDistribution))
-	assert_true(isinstance(new_univ, NaiveBayes))
+	assert isinstance(new_univ.distributions[0], NormalDistribution)
+	assert isinstance(new_univ.distributions[1], UniformDistribution)
+	assert isinstance(new_univ, NaiveBayes)
 	numpy.testing.assert_array_equal( model.weights, new_univ.weights)
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_io_log_probability():
+def test_io_log_probability(multivariate_gaussian):
+	model, X, y, X_nan = multivariate_gaussian
 	X2 = DataGenerator(X)
 	X3 = DataFrameGenerator(pandas.DataFrame(X))
 	logp1 = model.log_probability(X)
@@ -739,8 +734,8 @@
 	assert_array_almost_equal(logp1, logp2)
 	assert_array_almost_equal(logp1, logp3)
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_io_predict():
+def test_io_predict(multivariate_gaussian):
+	model, X, y, X_nan = multivariate_gaussian
 	X2 = DataGenerator(X)
 	X3 = DataFrameGenerator(pandas.DataFrame(X))
 	y_hat1 = model.predict(X)
@@ -749,8 +744,8 @@
 	assert_array_almost_equal(y_hat1, y_hat2)
 	assert_array_almost_equal(y_hat1, y_hat3)
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_io_predict_proba():
+def test_io_predict_proba(multivariate_gaussian):
+	model, X, y, X_nan = multivariate_gaussian
 	X2 = DataGenerator(X)
 	X3 = DataFrameGenerator(pandas.DataFrame(X))
 	y_hat1 = model.predict_proba(X)
@@ -759,8 +754,8 @@
 	assert_array_almost_equal(y_hat1, y_hat2)
 	assert_array_almost_equal(y_hat1, y_hat3)
 
-@with_setup(setup_multivariate_gaussian, teardown)
-def test_io_predict_log_proba():
+def test_io_predict_log_proba(multivariate_gaussian):
+	model, X, y, X_nan = multivariate_gaussian
 	X2 = DataGenerator(X)
 	X3 = DataFrameGenerator(pandas.DataFrame(X))
 	y_hat1 = model.predict_log_proba(X)
@@ -831,12 +826,12 @@
 		y,
 	)
 	p_y0 = m.distributions[0].parameters[0]
-	assert_almost_equal(list(p_y0[0].parameters[0].values()), [1/3, 1/3, 1/3])
-	assert_almost_equal(list(p_y0[1].parameters[0].values()), [2/3, 1/3])
+	assert list(p_y0[0].parameters[0].values()) == [1/3, 1/3, 1/3]
+	assert list(p_y0[1].parameters[0].values()) == [2/3, 1/3]
 
 	p_y1 = m.distributions[1].parameters[0]
-	assert_almost_equal(list(p_y1[0].parameters[0].values()), [1/3, 1/3, 1/3])
-	assert_almost_equal(list(p_y1[1].parameters[0].values()), [2/3, 1/3])
+	assert list(p_y1[0].parameters[0].values()) == [1/3, 1/3, 1/3]
+	assert list(p_y1[1].parameters[0].values()) == [2/3, 1/3]
 
 	# Check the probability calculation for a test variable.
 	X_test = np.array([[1, 0]])
--- python-pomegranate.orig/tests/test_profile_hmm.py
+++ python-pomegranate/tests/test_profile_hmm.py
@@ -1,16 +1,15 @@
 from __future__ import  (division, print_function)
 
 from pomegranate import *
-from .tools import with_setup
-from .tools import assert_equal
-from .tools import assert_not_equal
-from .tools import assert_raises
-from .tools import assert_almost_equal
+from numpy.testing import assert_almost_equal
 import random
 import numpy as np
 import json
+import pytest
 
-def setup():
+
+@pytest.fixture
+def model():
 	'''
 	Build a model that we want to use to test sequences. This model will
 	be somewhat complicated, in order to extensively test YAHMM. This will be
@@ -21,7 +20,6 @@
 
 	random.seed(0)
 
-	global model
 	model = HiddenMarkovModel( "Global Alignment")
 
 	# Define the distribution for insertions
@@ -88,9 +86,11 @@
 
 	# Call bake to finalize the structure of the model.
 	model.bake()
+	return model
 
 
-def multitransition_setup():
+@pytest.fixture
+def multitransition():
 	'''
 	Build a model that we want to use to test sequences. This is the same as the
 	above model, except that it uses the multiple transition methods for building.
@@ -143,9 +143,11 @@
 
 	# Call bake to finalize the structure of the model.
 	model.bake()
+	return model
 
 
-def tied_edge_setup():
+@pytest.fixture
+def tied_edge():
 	'''
 	Build a model that we want to use to test sequences. This model has
 	tied edges.
@@ -153,7 +155,6 @@
 
 	random.seed(0)
 
-	global model
 	model = HiddenMarkovModel( "Global Alignment")
 
 	# Define the distribution for insertions
@@ -220,19 +221,10 @@
 
 	# Call bake to finalize the structure of the model.
 	model.bake()
+	return model
 
 
-def teardown():
-	'''
-	Remove the model at the end of the unit testing. Since it is stored in a
-	global variance, simply delete it.
-	'''
-
-	pass
-
-
-@with_setup( setup, teardown )
-def test_same_length_viterbi():
+def test_same_length_viterbi(model):
 	scores = [ -0.5132449003570658, -11.048101241343396, -9.125519674022627,
 		-5.0879558788604475 ]
 	sequences = [ list(x) for x in [ 'ACT', 'GGC', 'GAT', 'ACC' ] ]
@@ -240,11 +232,11 @@
 	for seq, score in zip( sequences, scores ):
 		assert_almost_equal( model.viterbi( seq )[0], score )
 
-	assert_raises( ValueError, model.viterbi, list('XXX') )
+	with pytest.raises( ValueError ):
+		model.viterbi( list('XXX') )
 
 
-@with_setup( setup, teardown )
-def test_variable_length_viterbi():
+def test_variable_length_viterbi(model):
 	scores = [ -5.406181012423981, -10.88681993576597, -3.6244718790494277,
 	-3.644880750680635, -10.674332964640293, -10.393824835172445,
 	-8.67126440174503, -16.903451796110275, -16.451699654050792 ]
@@ -255,17 +247,15 @@
 		assert_almost_equal( model.viterbi( seq )[0], score )
 
 
-@with_setup( setup, teardown )
-def test_log_probability():
+def test_log_probability(model):
 	scores = [ -5.3931, -0.5052, -11.8478, -14.3482 ]
 	sequences = [ list(x) for x in ( 'A', 'ACT', 'GGCA', 'TACCTGT' ) ]
 
 	for seq, score in zip( sequences, scores ):
-		assert_equal( round( model.log_probability( seq ), 4 ), score )
+		assert round( model.log_probability( seq ), 4 ) == score
 
 
-@with_setup( setup, teardown )
-def test_posterior_transitions():
+def test_posterior_transitions(model):
 	a_scores = [ 0.0, 0.0021, 0.2017, 1.5105 ]
 	b_scores = [ 0.013, 0.0036, 1.9836, 2.145 ]
 	c_scores = [ 0.013, 0.0035, 0.817, 0.477 ]
@@ -280,15 +270,14 @@
 	for seq, a, b, c, d, t in scores:
 		trans, ems = model.forward_backward( seq )
 
-		assert_equal( round( trans[i].sum(), 4 ), a )
-		assert_equal( round( trans[j].sum(), 4 ), b )
-		assert_equal( round( trans[k].sum(), 4 ), c )
-		assert_equal( round( trans[l].sum(), 4 ), d )
-		assert_equal( round( trans.sum(), 4 ), t )
+		assert round( trans[i].sum(), 4 ) == a
+		assert round( trans[j].sum(), 4 ) == b
+		assert round( trans[k].sum(), 4 ) == c
+		assert round( trans[l].sum(), 4 ) == d
+		assert round( trans.sum(), 4 ) == t
 
 
-@with_setup( setup, teardown )
-def test_posterior_transitions_w_training():
+def test_posterior_transitions_w_training(model):
 	sequences = [ list(x) for x in ( 'A', 'ACT', 'GGCA', 'TACCTGT' ) ]
 	indices = { state.name: i for i, state in enumerate( model.states ) }
 
@@ -297,26 +286,25 @@
 	d1, d2, d3 = indices['D1'], indices['D2'], indices['D3']
 	m1, m2, m3 = indices['M1'], indices['M2'], indices['M3']
 
-	assert_equal( transitions[d1, i1], transitions[d2, i2] )
-	assert_equal( transitions[i0, i0], transitions[i1, i1] )
-	assert_equal( transitions[i0, i0], transitions[i2, i2] )
-	assert_equal( transitions[i0, m1], transitions[i1, m2] )
-	assert_equal( transitions[d1, d2], transitions[d2, d3] )
-	assert_equal( transitions[i0, d1], transitions[i1, d2] )
-	assert_equal( transitions[i0, d1], transitions[i2, d3] )
+	assert transitions[d1, i1] == transitions[d2, i2]
+	assert transitions[i0, i0] == transitions[i1, i1]
+	assert transitions[i0, i0] == transitions[i2, i2]
+	assert transitions[i0, m1] == transitions[i1, m2]
+	assert transitions[d1, d2] == transitions[d2, d3]
+	assert transitions[i0, d1] == transitions[i1, d2]
+	assert transitions[i0, d1] == transitions[i2, d3]
 
 	model.fit( sequences, verbose=False )
 	transitions = model.dense_transition_matrix()
 
-	assert_not_equal( transitions[d1, i1], transitions[d2, i2] )
-	assert_not_equal( transitions[i0, m1], transitions[i1, m2] )
-	assert_not_equal( transitions[d1, d2], transitions[d2, d3] )
-	assert_not_equal( transitions[i0, d1], transitions[i1, d2] )
-	assert_not_equal( transitions[i0, d1], transitions[i2, d3] )
+	assert transitions[d1, i1] != transitions[d2, i2]
+	assert transitions[i0, m1] != transitions[i1, m2]
+	assert transitions[d1, d2] != transitions[d2, d3]
+	assert transitions[i0, d1] != transitions[i1, d2]
+	assert transitions[i0, d1] != transitions[i2, d3]
 
 
-@with_setup( setup, teardown )
-def test_posterior_transitions_w_vtraining():
+def test_posterior_transitions_w_vtraining(model):
 	sequences = [ list(x) for x in ( 'A', 'ACT', 'GGCA', 'TACCTGT' ) ]
 	indices = { state.name: i for i, state in enumerate( model.states ) }
 
@@ -325,25 +313,25 @@
 	d1, d2, d3 = indices['D1'], indices['D2'], indices['D3']
 	m1, m2, m3 = indices['M1'], indices['M2'], indices['M3']
 
-	assert_equal( transitions[d1, i1], transitions[d2, i2] )
-	assert_equal( transitions[i0, i0], transitions[i1, i1] )
-	assert_equal( transitions[i0, i0], transitions[i2, i2] )
-	assert_equal( transitions[i0, m1], transitions[i1, m2] )
-	assert_equal( transitions[d1, d2], transitions[d2, d3] )
-	assert_equal( transitions[i0, d1], transitions[i1, d2] )
-	assert_equal( transitions[i0, d1], transitions[i2, d3] )
+	assert transitions[d1, i1] == transitions[d2, i2]
+	assert transitions[i0, i0] == transitions[i1, i1]
+	assert transitions[i0, i0] == transitions[i2, i2]
+	assert transitions[i0, m1] == transitions[i1, m2]
+	assert transitions[d1, d2] == transitions[d2, d3]
+	assert transitions[i0, d1] == transitions[i1, d2]
+	assert transitions[i0, d1] == transitions[i2, d3]
 
 	model.fit( sequences, verbose=False, algorithm='viterbi' )
 	transitions = model.dense_transition_matrix()
 
-	assert_not_equal( transitions[i0, i0], transitions[i1, i1] )
-	assert_not_equal( transitions[d1, d2], transitions[d2, d3] )
-	assert_not_equal( transitions[i0, d1], transitions[i1, d2] )
-	assert_not_equal( transitions[i0, d1], transitions[i2, d3] )
+	assert transitions[i0, i0] != transitions[i1, i1]
+	assert transitions[d1, d2] != transitions[d2, d3]
+	assert transitions[i0, d1] != transitions[i1, d2]
+	assert transitions[i0, d1] != transitions[i2, d3]
 
 
-@with_setup( tied_edge_setup, teardown )
-def test_posterior_transitions_w_tied_training():
+def test_posterior_transitions_w_tied_training(tied_edge):
+	model = tied_edge
 	sequences = [ list(x) for x in ( 'A', 'ACT', 'GGCA', 'TACCTGT' ) ]
 	indices = { state.name: i for i, state in enumerate( model.states ) }
 
@@ -352,25 +340,25 @@
 	d1, d2, d3 = indices['D1'], indices['D2'], indices['D3']
 	m1, m2, m3 = indices['M1'], indices['M2'], indices['M3']
 
-	assert_equal( transitions[d1, i1], transitions[d2, i2] )
-	assert_equal( transitions[i0, i0], transitions[i1, i1] )
-	assert_equal( transitions[i0, i0], transitions[i2, i2] )
-	assert_equal( transitions[i0, m1], transitions[i1, m2] )
-	assert_equal( transitions[d1, d2], transitions[d2, d3] )
-	assert_equal( transitions[i0, d1], transitions[i1, d2] )
-	assert_equal( transitions[i0, d1], transitions[i2, d3] )
+	assert transitions[d1, i1] == transitions[d2, i2]
+	assert transitions[i0, i0] == transitions[i1, i1]
+	assert transitions[i0, i0] == transitions[i2, i2]
+	assert transitions[i0, m1] == transitions[i1, m2]
+	assert transitions[d1, d2] == transitions[d2, d3]
+	assert transitions[i0, d1] == transitions[i1, d2]
+	assert transitions[i0, d1] == transitions[i2, d3]
 
 	model.fit( sequences, verbose=False )
 	transitions = model.dense_transition_matrix()
 
-	assert_equal( transitions[i0, i0], transitions[i1, i1] )
-	assert_equal( transitions[d1, d2], transitions[d2, d3] )
-	assert_equal( transitions[i0, d1], transitions[i1, d2] )
-	assert_equal( transitions[i0, d1], transitions[i2, d3] )
+	assert transitions[i0, i0] == transitions[i1, i1]
+	assert transitions[d1, d2] == transitions[d2, d3]
+	assert transitions[i0, d1] == transitions[i1, d2]
+	assert transitions[i0, d1] == transitions[i2, d3]
 
 
-@with_setup( tied_edge_setup, teardown )
-def test_posterior_transitions_w_tied_vtraining():
+def test_posterior_transitions_w_tied_vtraining(tied_edge):
+	model = tied_edge
 	sequences = [ list(x) for x in ( 'A', 'ACT', 'GGCA', 'TACCTGT' ) ]
 	indices = { state.name: i for i, state in enumerate( model.states ) }
 
@@ -379,28 +367,27 @@
 	d1, d2, d3 = indices['D1'], indices['D2'], indices['D3']
 	m1, m2, m3 = indices['M1'], indices['M2'], indices['M3']
 
-	assert_equal( transitions[d1, i1], transitions[d2, i2] )
-	assert_equal( transitions[i0, i0], transitions[i1, i1] )
-	assert_equal( transitions[i0, i0], transitions[i2, i2] )
-	assert_equal( transitions[i0, m1], transitions[i1, m2] )
-	assert_equal( transitions[d1, d2], transitions[d2, d3] )
-	assert_equal( transitions[i0, d1], transitions[i1, d2] )
-	assert_equal( transitions[i0, d1], transitions[i2, d3] )
+	assert transitions[d1, i1] == transitions[d2, i2]
+	assert transitions[i0, i0] == transitions[i1, i1]
+	assert transitions[i0, i0] == transitions[i2, i2]
+	assert transitions[i0, m1] == transitions[i1, m2]
+	assert transitions[d1, d2] == transitions[d2, d3]
+	assert transitions[i0, d1] == transitions[i1, d2]
+	assert transitions[i0, d1] == transitions[i2, d3]
 
 	model.fit( sequences, verbose=False, algorithm='viterbi' )
 	transitions = model.dense_transition_matrix()
 
-	assert_equal( transitions[d1, i1], transitions[d2, i2] )
-	assert_equal( transitions[i0, i0], transitions[i1, i1] )
-	assert_equal( transitions[i0, i0], transitions[i2, i2] )
-	assert_equal( transitions[i0, m1], transitions[i1, m2] )
-	assert_equal( transitions[d1, d2], transitions[d2, d3] )
-	assert_equal( transitions[i0, d1], transitions[i1, d2] )
-	assert_equal( transitions[i0, d1], transitions[i2, d3] )
+	assert transitions[d1, i1] == transitions[d2, i2]
+	assert transitions[i0, i0] == transitions[i1, i1]
+	assert transitions[i0, i0] == transitions[i2, i2]
+	assert transitions[i0, m1] == transitions[i1, m2]
+	assert transitions[d1, d2] == transitions[d2, d3]
+	assert transitions[i0, d1] == transitions[i1, d2]
+	assert transitions[i0, d1] == transitions[i2, d3]
 
 
-@with_setup( setup, teardown )
-def test_posterior_emissions():
+def test_posterior_emissions(model):
 	a_scores = [ 0.987, 0.9965, 0.183, 0.523 ]
 	b_scores = [ 0.0, 0.9977, 0.7364, 0.6318 ]
 	c_scores = [ 0.0, 0.9975, 0.6237, 0.8641 ]
@@ -414,15 +401,15 @@
 		trans, ems = model.forward_backward( seq )
 		ems = np.exp( ems )
 
-		assert_equal( round( ems[:,i].sum(), 4 ), a )
-		assert_equal( round( ems[:,j].sum(), 4 ), b )
-		assert_equal( round( ems[:,k].sum(), 4 ), c )
-		assert_equal( round( ems[:,l].sum(), 4 ), d )
-		assert_equal( round( ems.sum() ), len( seq ) )
+		assert round( ems[:,i].sum(), 4 ) == a
+		assert round( ems[:,j].sum(), 4 ) == b
+		assert round( ems[:,k].sum(), 4 ) == c
+		assert round( ems[:,l].sum(), 4 ) == d
+		assert round( ems.sum() ) == len( seq )
 
 
-@with_setup( multitransition_setup, teardown )
-def test_posterior_emissions_w_multitransition_setup():
+def test_posterior_emissions_w_multitransition_setup(multitransition):
+	model = multitransition
 	a_scores = [ 0.987, 0.9965, 0.183, 0.523 ]
 	b_scores = [ 0.0, 0.9977, 0.7364, 0.6318 ]
 	c_scores = [ 0.0, 0.9975, 0.6237, 0.8641 ]
@@ -436,15 +423,15 @@
 		trans, ems = model.forward_backward( seq )
 		ems = np.exp( ems )
 
-		assert_equal( round( ems[:,i].sum(), 4 ), a )
-		assert_equal( round( ems[:,j].sum(), 4 ), b )
-		assert_equal( round( ems[:,k].sum(), 4 ), c )
-		assert_equal( round( ems[:,l].sum(), 4 ), d )
-		assert_equal( round( ems.sum() ), len( seq ) )
+		assert round( ems[:,i].sum(), 4 ) == a
+		assert round( ems[:,j].sum(), 4 ) == b
+		assert round( ems[:,k].sum(), 4 ) == c
+		assert round( ems[:,l].sum(), 4 ) == d
+		assert round( ems.sum() ) == len( seq )
 
 
-@with_setup( tied_edge_setup, teardown )
-def test_posterior_emissions_w_tied_edge_setup():
+def test_posterior_emissions_w_tied_edge_setup(tied_edge):
+	model = tied_edge
 	a_scores = [ 0.987, 0.9965, 0.183, 0.523 ]
 	b_scores = [ 0.0, 0.9977, 0.7364, 0.6318 ]
 	c_scores = [ 0.0, 0.9975, 0.6237, 0.8641 ]
@@ -458,34 +445,31 @@
 		trans, ems = model.forward_backward( seq )
 		ems = np.exp( ems )
 
-		assert_equal( round( ems[:,i].sum(), 4 ), a )
-		assert_equal( round( ems[:,j].sum(), 4 ), b )
-		assert_equal( round( ems[:,k].sum(), 4 ), c )
-		assert_equal( round( ems[:,l].sum(), 4 ), d )
-		assert_equal( round( ems.sum() ), len( seq ) )
-
-
-@with_setup( setup, teardown )
-def test_properties():
-	assert_equal( model.edge_count(), 29 )
-	assert_equal( model.state_count(), 12 )
-	assert_equal( model.name, "Global Alignment" )
+		assert round( ems[:,i].sum(), 4 ) == a
+		assert round( ems[:,j].sum(), 4 ) == b
+		assert round( ems[:,k].sum(), 4 ) == c
+		assert round( ems[:,l].sum(), 4 ) == d
+		assert round( ems.sum() ) == len( seq )
+
+
+def test_properties(model):
+	assert model.edge_count() == 29
+	assert model.state_count() == 12
+	assert model.name == "Global Alignment"
 
 
-@with_setup( setup, teardown )
-def test_to_json():
+def test_to_json(model):
 	b = json.loads(model.to_json())
 
-	assert_equal(b['name'], 'Global Alignment')
-	assert_equal(len(b['edges']), 29)
-	assert_equal(len(b['states']), 12)
-	assert_equal(b['silent_index'], 7)
+	assert b['name'] == 'Global Alignment'
+	assert len(b['edges']) == 29
+	assert len(b['states']) == 12
+	assert b['silent_index'] == 7
 
 
-@with_setup( setup, teardown )
-def test_from_json():
+def test_from_json(model):
 	hmm = HiddenMarkovModel.from_json( model.to_json() )
 
-	assert_equal(hmm.edge_count(), 29)
-	assert_equal(hmm.state_count(), 12)
-	assert_equal(hmm.name, "Global Alignment")
+	assert hmm.edge_count() == 29
+	assert hmm.state_count() == 12
+	assert hmm.name == "Global Alignment"
--- python-pomegranate.orig/setup.py
+++ python-pomegranate/setup.py
@@ -100,7 +100,6 @@
         "Plotting": ["pygraphviz", "matplotlib"],
         "GPU": ["cupy"],
     },
-    test_suite = 'nose.collector',
     package_data={
         'pomegranate': ['*.pyd', '*.pxd'],
         'pomegranate/distributions': ['*.pyd', '*.pxd'],
--- /dev/null
+++ python-pomegranate/tests/assert_tools.py
@@ -0,0 +1,141 @@
+"""
+Copyright 2016 Oliver Schoenborn. BSD 3-Clause license (see __license__ at bottom of this file for details).
+
+This module is part of the nose2pytest distribution.
+
+This module's assert_ functions provide drop-in replacements for nose.tools.assert_ functions (many of which are
+pep-8-ized extractions from Python's unittest.case.TestCase methods). As such, it can be imported in a test
+suite run by pytest, to replace the nose imports with functions that rely on pytest's assertion
+introspection for error reporting.  When combined with running nose2pytest.py on your test suite, this
+module may be sufficient to decrease your test suite's third-party dependencies by 1.
+"""
+
+import pytest
+import unittest
+
+
+__all__ = [
+    'assert_almost_equal',
+    'assert_not_almost_equal',
+    'assert_dict_contains_subset',
+
+    'assert_raises_regex',
+    'assert_raises_regexp',
+    'assert_regexp_matches',
+    'assert_warns_regex',
+]
+
+
+def assert_almost_equal(a, b, places=7, msg=None):
+    """
+    Fail if the two objects are unequal as determined by their
+    difference rounded to the given number of decimal places
+    and comparing to zero.
+
+    Note that decimal places (from zero) are usually not the same
+    as significant digits (measured from the most signficant digit).
+
+    See the builtin round() function for places parameter.
+    """
+    if msg is None:
+        assert round(abs(b - a), places) == 0
+    else:
+        assert round(abs(b - a), places) == 0, msg
+
+
+def assert_not_almost_equal(a, b, places=7, msg=None):
+    """
+    Fail if the two objects are equal as determined by their
+    difference rounded to the given number of decimal places
+    and comparing to zero.
+
+    Note that decimal places (from zero) are usually not the same
+    as significant digits (measured from the most signficant digit).
+
+    See the builtin round() function for places parameter.
+    """
+    if msg is None:
+        assert round(abs(b - a), places) != 0
+    else:
+        assert round(abs(b - a), places) != 0, msg
+
+
+def assert_dict_contains_subset(subset, dictionary, msg=None):
+    """
+    Checks whether dictionary is a superset of subset. If not, the assertion message will have useful details,
+    unless msg is given, then msg is output.
+    """
+    dictionary = dictionary
+    missing_keys = sorted(list(set(subset.keys()) - set(dictionary.keys())))
+    mismatch_vals = {k: (subset[k], dictionary[k]) for k in subset if k in dictionary and subset[k] != dictionary[k]}
+    if msg is None:
+        assert missing_keys == [], 'Missing keys = {}'.format(missing_keys)
+        assert mismatch_vals == {}, 'Mismatched values (s, d) = {}'.format(mismatch_vals)
+    else:
+        assert missing_keys == [], msg
+        assert mismatch_vals == {}, msg
+
+
+# make other unittest.TestCase methods available as-is as functions; trick taken from Nose
+
+class _Dummy(unittest.TestCase):
+    def do_nothing(self):
+        pass
+
+_t = _Dummy('do_nothing')
+
+assert_raises_regex=_t.assertRaisesRegex,
+assert_raises_regexp=_t.assertRaisesRegex,
+assert_regexp_matches=_t.assertRegex,
+assert_warns_regex=_t.assertWarnsRegex,
+
+del _Dummy
+del _t
+
+
+# pytest integration: add all assert_ function to the pytest package namespace
+
+# Use similar trick as Nose to bring in bound methods from unittest.TestCase as free functions:
+
+
+def _supported_nose_name(name):
+    return name.startswith('assert_') or name in ('ok_', 'eq_')
+
+
+def pytest_configure():
+    for name, obj in globals().items():
+        if _supported_nose_name(name):
+            setattr(pytest, name, obj)
+
+
+# licensing
+
+__license__ = """
+    Copyright (c) 2016, Oliver Schoenborn
+    All rights reserved.
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice, this
+      list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright notice,
+      this list of conditions and the following disclaimer in the documentation
+      and/or other materials provided with the distribution.
+
+    * Neither the name of nose2pytest nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+    DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+    DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+    OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""
--- python-pomegranate.orig/tests/test_markov_chain.py
+++ python-pomegranate/tests/test_markov_chain.py
@@ -1,369 +1,372 @@
 from __future__ import (division)
 
 from pomegranate import *
-from .tools import with_setup
-from .tools import assert_almost_equal
-from .tools import assert_equal
-from .tools import assert_not_equal
-from .tools import assert_less_equal
-from .tools import assert_raises
-import random
+
+from .assert_tools import assert_almost_equal
+
 import pickle
-import numpy as np
+import pytest
 
+
+@pytest.fixture
 def setup():
-	global data
-	global weights
-	global zeroth_dist
-	global first_dist
-	global second_dist
-
-	data = [ list('AAACBDAA'), list('DBBCBABD'), list('CBDCCDCBC'),	 list('DDDDC'),
-			 list('DADBBCBBBACBC'), list('CBDBAC'), list('CACDDDBCAABA'), list('BBABDAADCABCCAD'),
-			 list('DCBBBADBBCBC'), list('DCADDAAA'), list('AABBBCC'), list('CABBADACDBBDCC'),
-			 list('AADDCDDDB'), list('CCDBDCDDDABDAC'), list('DDADB'), list('BCDACDBBBBAC'),
-			 list('ADCBCCCB'), list('BDDBCA'), list('ACCDCBADCBBBB'), list('ACCBABCBA'),
-			 list('DAABCCBBD'), list('CCAADACBDCDBABC'), list('BCDCACBBAD'), list('CBBBCDCA'),
-			 list('CBDADACAADBCC'), list('DCDABCC'), list('AAAABCBC'), list('BDDDDACBBCABA'),
-			 list('CCBDCBDCBADBBDB'), list('CBDAA'), list('CCDAAAB'), list('ABDBAC'),
-			 list('DBCCDCC'), list('CCDCBB'), list('ACDBCDD'), list('CACCACCDBBDABB'),
-			 list('DCDDAB'), list('ADDDCDACACA'), list('AAACACADB'), list('BDBBDDBACDCCAAA') ]
-
-	weights = [ 8, 7, 1, 2, 8, 1, 9, 2, 6, 2, 5, 6, 10, 10, 8, 4, 10, 10, 2, 9,
-				6, 2, 9, 9, 3, 1, 10, 6, 9, 1, 2, 9, 2, 1, 4, 1, 2, 7, 9, 4]
-
-	zeroth_dist = DiscreteDistribution( { 'A': 0.1, 'B': 0.2, 'C': 0.3, 'D': 0.4 } )
-
-	first_dist = ConditionalProbabilityTable(
-		[[ 'A', 'A', 0.8 ], [ 'A', 'B', 0.05 ], [ 'A', 'C', 0.05 ], [ 'A', 'D', 0.1 ],
-		 [ 'B', 'A', 0.1 ], [ 'B', 'B', 0.2 ], [ 'B', 'C', 0.6 ], [ 'B', 'D', 0.1 ],
-		 [ 'C', 'A', 0.15 ], [ 'C', 'B', 0.1 ], [ 'C', 'C', 0.25 ], [ 'C', 'D', 0.5 ],
-		 [ 'D', 'A', 0.25 ], [ 'D', 'B', 0.25 ], [ 'D', 'C', 0.4 ], [ 'D', 'D', 0.1 ]],
-		 [ zeroth_dist ] )
-
-	second_dist = ConditionalProbabilityTable(
-		[[ 'A', 'A', 'A', 0.05 ], [ 'A', 'A', 'B', 0.25 ], [ 'A', 'A', 'C', 0.15 ], [ 'A', 'A', 'D', 0.55 ],
-		 [ 'A', 'B', 'A', 0.05 ], [ 'A', 'B', 'B', 0.05 ], [ 'A', 'B', 'C', 0.85 ], [ 'A', 'B', 'D', 0.05 ],
-		 [ 'A', 'C', 'A', 0.7 ], [ 'A', 'C', 'B', 0.1 ], [ 'A', 'C', 'C', 0.1 ], [ 'A', 'C', 'D', 0.1 ],
-		 [ 'A', 'D', 'A', 0.2 ], [ 'A', 'D', 'B', 0.4 ], [ 'A', 'D', 'C', 0.35 ], [ 'A', 'D', 'D', 0.05 ],
-		 [ 'B', 'A', 'A', 0.3 ], [ 'B', 'A', 'B', 0.05 ], [ 'B', 'A', 'C', 0.15 ], [ 'B', 'A', 'D', 0.5 ],
-		 [ 'B', 'B', 'A', 0.8 ], [ 'B', 'B', 'B', 0.1 ], [ 'B', 'B', 'C', 0.1 ], [ 'B', 'B', 'D', 0.0 ],
-		 [ 'B', 'C', 'A', 0.1 ], [ 'B', 'C', 'B', 0.35 ], [ 'B', 'C', 'C', 0.3 ], [ 'B', 'C', 'D', 0.25 ],
-		 [ 'B', 'D', 'A', 0.3 ], [ 'B', 'D', 'B', 0.1 ], [ 'B', 'D', 'C', 0.2 ], [ 'B', 'D', 'D', 0.4 ],
-		 [ 'C', 'A', 'A', 0.2 ], [ 'C', 'A', 'B', 0.3 ], [ 'C', 'A', 'C', 0.3 ], [ 'C', 'A', 'D', 0.2 ],
-		 [ 'C', 'B', 'A', 0.35 ], [ 'C', 'B', 'B', 0.45 ], [ 'C', 'B', 'C', 0.0 ], [ 'C', 'B', 'D', 0.2 ],
-		 [ 'C', 'C', 'A', 0.25 ], [ 'C', 'C', 'B', 0.0 ], [ 'C', 'C', 'C', 0.6 ], [ 'C', 'C', 'D', 0.15 ],
-		 [ 'C', 'D', 'A', 0.8 ], [ 'C', 'D', 'B', 0.1 ], [ 'C', 'D', 'C', 0.05 ], [ 'C', 'D', 'D', 0.05 ],
-		 [ 'D', 'A', 'A', 0.5 ], [ 'D', 'A', 'B', 0.0 ], [ 'D', 'A', 'C', 0.5 ], [ 'D', 'A', 'D', 0.0 ],
-		 [ 'D', 'B', 'A', 0.35 ], [ 'D', 'B', 'B', 0.5 ], [ 'D', 'B', 'C', 0.1 ], [ 'D', 'B', 'D', 0.05 ],
-		 [ 'D', 'C', 'A', 0.1 ], [ 'D', 'C', 'B', 0.45 ], [ 'D', 'C', 'C', 0.0 ], [ 'D', 'C', 'D', 0.45 ],
-		 [ 'D', 'D', 'A', 0.2 ], [ 'D', 'D', 'B', 0.1 ], [ 'D', 'D', 'C', 0.1 ], [ 'D', 'D', 'D', 0.6 ]],
-		 [ zeroth_dist, first_dist ] )
-
-def teardown():
-	pass
-
-@with_setup( setup, teardown )
-def test_zeroth_dist():
-	assert_almost_equal( zeroth_dist.log_probability( 'A' ), -2.3025850929940455 )
-	assert_almost_equal( zeroth_dist.log_probability( 'B' ), -1.6094379124341003 )
-	assert_almost_equal( zeroth_dist.log_probability( 'C' ), -1.2039728043259361 )
-	assert_almost_equal( zeroth_dist.log_probability( 'D' ), -0.916290731874155 )
-	assert_almost_equal( zeroth_dist.log_probability( 'E' ), -float('inf') )
-
-@with_setup( setup, teardown )
-def test_first_dist():
-	assert_almost_equal( first_dist.log_probability( ('A', 'A') ), -0.22314355131420971 )
-	assert_almost_equal( first_dist.log_probability( ('A', 'B') ), -2.9957322735539909 )
-
-	assert_almost_equal( first_dist.log_probability( ('B', 'B') ), -1.6094379124341003 )
-	assert_almost_equal( first_dist.log_probability( ('B', 'C') ), -0.51082562376599072 )
-
-	assert_almost_equal( first_dist.log_probability( ('C', 'C') ), -1.3862943611198906 )
-	assert_almost_equal( first_dist.log_probability( ('C', 'D') ), -0.69314718055994529 )
-
-	assert_almost_equal( first_dist.log_probability( ('D', 'D') ), -2.3025850929940455 )
-	assert_almost_equal( first_dist.log_probability( ('D', 'A') ), -1.3862943611198906 )
-
-@with_setup( setup, teardown )
-def test_second_dist():
-	assert_almost_equal( second_dist.log_probability( ('A', 'A', 'C') ), -1.89711998489 )
-	assert_almost_equal( second_dist.log_probability( ('A', 'A', 'A') ), -2.99573227355 )
-	assert_almost_equal( second_dist.log_probability( ('A', 'B', 'A') ), -2.99573227355 )
-	assert_almost_equal( second_dist.log_probability( ('A', 'B', 'C') ), -0.162518929498 )
-
-	assert_almost_equal( second_dist.log_probability( ('A', 'C', 'C') ), -2.30258509299 )
-	assert_almost_equal( second_dist.log_probability( ('A', 'C', 'D') ), -2.30258509299 )
-	assert_almost_equal( second_dist.log_probability( ('A', 'D', 'B') ), -0.916290731874 )
-	assert_almost_equal( second_dist.log_probability( ('A', 'D', 'D') ), -2.99573227355 )
-
-	assert_almost_equal( second_dist.log_probability( ('B', 'A', 'B') ), -2.99573227355 )
-	assert_almost_equal( second_dist.log_probability( ('B', 'A', 'D') ), -0.69314718056 )
-	assert_almost_equal( second_dist.log_probability( ('B', 'B', 'B') ), -2.30258509299 )
-	assert_almost_equal( second_dist.log_probability( ('B', 'B', 'D') ), -float('inf') )
-
-	assert_almost_equal( second_dist.log_probability( ('B', 'C', 'A') ), -2.30258509299 )
-	assert_almost_equal( second_dist.log_probability( ('B', 'C', 'B') ), -1.0498221245 )
-	assert_almost_equal( second_dist.log_probability( ('B', 'D', 'D') ), -0.916290731874 )
-	assert_almost_equal( second_dist.log_probability( ('B', 'D', 'B') ), -2.30258509299 )
-
-	assert_almost_equal( second_dist.log_probability( ('C', 'A', 'A') ), -1.60943791243 )
-	assert_almost_equal( second_dist.log_probability( ('C', 'A', 'B') ), -1.20397280433 )
-	assert_almost_equal( second_dist.log_probability( ('C', 'B', 'C') ), -float('inf') )
-	assert_almost_equal( second_dist.log_probability( ('C', 'B', 'A') ), -1.0498221245 )
-
-	assert_almost_equal( second_dist.log_probability( ('C', 'C', 'D') ), -1.89711998489 )
-	assert_almost_equal( second_dist.log_probability( ('C', 'C', 'B') ), -float('inf') )
-	assert_almost_equal( second_dist.log_probability( ('C', 'D', 'A') ), -0.223143551314 )
-	assert_almost_equal( second_dist.log_probability( ('C', 'D', 'C') ), -2.99573227355 )
-
-	assert_almost_equal( second_dist.log_probability( ('D', 'A', 'D') ), -float('inf') )
-	assert_almost_equal( second_dist.log_probability( ('D', 'A', 'A') ), -0.69314718056 )
-	assert_almost_equal( second_dist.log_probability( ('D', 'B', 'D') ), -2.99573227355 )
-	assert_almost_equal( second_dist.log_probability( ('D', 'B', 'C') ), -2.30258509299 )
-
-	assert_almost_equal( second_dist.log_probability( ('D', 'C', 'A') ), -2.30258509299 )
-	assert_almost_equal( second_dist.log_probability( ('D', 'C', 'D') ), -0.798507696218 )
-	assert_almost_equal( second_dist.log_probability( ('D', 'D', 'D') ), -0.510825623766 )
-	assert_almost_equal( second_dist.log_probability( ('D', 'D', 'A') ), -1.60943791243 )
-
-@with_setup( setup, teardown )
-def test_constructors():
-	# raises no errors
-	first_chain = MarkovChain([ zeroth_dist, first_dist ])
-	second_chain = MarkovChain([ zeroth_dist, first_dist, second_dist ])
-
-@with_setup( setup, teardown )
-def test_first_log_probability():
-	# test going one state back
-	first_chain = MarkovChain([ zeroth_dist, first_dist ])
-
-	assert_almost_equal( first_chain.log_probability( list('A') ), -2.3025850929940455 )
-	assert_almost_equal( first_chain.log_probability( list('B') ), -1.6094379124341003 )
-
-	assert_almost_equal( first_chain.log_probability( list('AC') ), -5.2983173665480363 )
-	assert_almost_equal( first_chain.log_probability( list('AD') ), -4.6051701859880909 )
-	assert_almost_equal( first_chain.log_probability( list('BD') ), -3.9120230054281455 )
-	assert_almost_equal( first_chain.log_probability( list('BA') ), -3.9120230054281455 )
-	assert_almost_equal( first_chain.log_probability( list('CA') ), -3.1010927892118172 )
-	assert_almost_equal( first_chain.log_probability( list('CB') ), -3.5065578973199818 )
-	assert_almost_equal( first_chain.log_probability( list('DB') ), -2.3025850929940455 )
-	assert_almost_equal( first_chain.log_probability( list('DC') ), -1.83258146374831 )
-
-	assert_almost_equal( first_chain.log_probability( list('ABDD') ), -9.9034875525361272 )
-	assert_almost_equal( first_chain.log_probability( list('CCCB') ), -6.2791466195597625 )
-	assert_almost_equal( first_chain.log_probability( list('CCBD') ), -7.1954373514339167 )
-	assert_almost_equal( first_chain.log_probability( list('ACAC') ), -10.191169624987909 )
-
-	assert_almost_equal( first_chain.log_probability( list('ABDBCCDC') ), -12.493754717981954 )
-	assert_almost_equal( first_chain.log_probability( list('DACCBDCB') ), -14.508657738524217 )
-
-	assert_almost_equal( first_chain.log_probability( list('BCCCACBDBDBABACD') ), -30.75583043175768 )
-
-	assert_almost_equal( first_chain.log_probability( list('DABBCBDACAAADCBDCDBCBDCACBDABBAA') ), -53.616465272807112 )
-
-	# test going two states back
-	second_chain = MarkovChain([ zeroth_dist, first_dist, second_dist ])
-
-	assert_almost_equal( second_chain.log_probability( list('A') ), -2.30258509299 )
-	assert_almost_equal( second_chain.log_probability( list('B') ), -1.60943791243 )
-
-	assert_almost_equal( second_chain.log_probability( list('AC') ), -5.29831736655 )
-	assert_almost_equal( second_chain.log_probability( list('AD') ), -4.60517018599 )
-	assert_almost_equal( second_chain.log_probability( list('BD') ), -3.91202300543 )
-	assert_almost_equal( second_chain.log_probability( list('BA') ), -3.91202300543 )
-	assert_almost_equal( second_chain.log_probability( list('CA') ), -3.10109278921 )
-	assert_almost_equal( second_chain.log_probability( list('CB') ), -3.50655789732 )
-	assert_almost_equal( second_chain.log_probability( list('DB') ), -2.30258509299 )
-	assert_almost_equal( second_chain.log_probability( list('DC') ), -1.83258146375 )
-
-	assert_almost_equal( second_chain.log_probability( list('ABDD') ), -9.21034037198 )
-	assert_almost_equal( second_chain.log_probability( list('CCCD') ), -4.9982127741 )
-	assert_almost_equal( second_chain.log_probability( list('CCBD') ), -float('inf') )
-	assert_almost_equal( second_chain.log_probability( list('ACAC') ), -6.85896511481 )
+    data = [ list('AAACBDAA'), list('DBBCBABD'), list('CBDCCDCBC'),	 list('DDDDC'),
+    		 list('DADBBCBBBACBC'), list('CBDBAC'), list('CACDDDBCAABA'), list('BBABDAADCABCCAD'),
+    		 list('DCBBBADBBCBC'), list('DCADDAAA'), list('AABBBCC'), list('CABBADACDBBDCC'),
+    		 list('AADDCDDDB'), list('CCDBDCDDDABDAC'), list('DDADB'), list('BCDACDBBBBAC'),
+    		 list('ADCBCCCB'), list('BDDBCA'), list('ACCDCBADCBBBB'), list('ACCBABCBA'),
+    		 list('DAABCCBBD'), list('CCAADACBDCDBABC'), list('BCDCACBBAD'), list('CBBBCDCA'),
+    		 list('CBDADACAADBCC'), list('DCDABCC'), list('AAAABCBC'), list('BDDDDACBBCABA'),
+    		 list('CCBDCBDCBADBBDB'), list('CBDAA'), list('CCDAAAB'), list('ABDBAC'),
+    		 list('DBCCDCC'), list('CCDCBB'), list('ACDBCDD'), list('CACCACCDBBDABB'),
+    		 list('DCDDAB'), list('ADDDCDACACA'), list('AAACACADB'), list('BDBBDDBACDCCAAA') ]
+
+    weights = [ 8, 7, 1, 2, 8, 1, 9, 2, 6, 2, 5, 6, 10, 10, 8, 4, 10, 10, 2, 9,
+    			6, 2, 9, 9, 3, 1, 10, 6, 9, 1, 2, 9, 2, 1, 4, 1, 2, 7, 9, 4]
+
+    zeroth_dist = DiscreteDistribution( { 'A': 0.1, 'B': 0.2, 'C': 0.3, 'D': 0.4 } )
+
+    first_dist = ConditionalProbabilityTable(
+    	[[ 'A', 'A', 0.8 ], [ 'A', 'B', 0.05 ], [ 'A', 'C', 0.05 ], [ 'A', 'D', 0.1 ],
+    	 [ 'B', 'A', 0.1 ], [ 'B', 'B', 0.2 ], [ 'B', 'C', 0.6 ], [ 'B', 'D', 0.1 ],
+    	 [ 'C', 'A', 0.15 ], [ 'C', 'B', 0.1 ], [ 'C', 'C', 0.25 ], [ 'C', 'D', 0.5 ],
+    	 [ 'D', 'A', 0.25 ], [ 'D', 'B', 0.25 ], [ 'D', 'C', 0.4 ], [ 'D', 'D', 0.1 ]],
+    	 [ zeroth_dist ] )
+
+    second_dist = ConditionalProbabilityTable(
+    	[[ 'A', 'A', 'A', 0.05 ], [ 'A', 'A', 'B', 0.25 ], [ 'A', 'A', 'C', 0.15 ], [ 'A', 'A', 'D', 0.55 ],
+    	 [ 'A', 'B', 'A', 0.05 ], [ 'A', 'B', 'B', 0.05 ], [ 'A', 'B', 'C', 0.85 ], [ 'A', 'B', 'D', 0.05 ],
+    	 [ 'A', 'C', 'A', 0.7 ], [ 'A', 'C', 'B', 0.1 ], [ 'A', 'C', 'C', 0.1 ], [ 'A', 'C', 'D', 0.1 ],
+    	 [ 'A', 'D', 'A', 0.2 ], [ 'A', 'D', 'B', 0.4 ], [ 'A', 'D', 'C', 0.35 ], [ 'A', 'D', 'D', 0.05 ],
+    	 [ 'B', 'A', 'A', 0.3 ], [ 'B', 'A', 'B', 0.05 ], [ 'B', 'A', 'C', 0.15 ], [ 'B', 'A', 'D', 0.5 ],
+    	 [ 'B', 'B', 'A', 0.8 ], [ 'B', 'B', 'B', 0.1 ], [ 'B', 'B', 'C', 0.1 ], [ 'B', 'B', 'D', 0.0 ],
+    	 [ 'B', 'C', 'A', 0.1 ], [ 'B', 'C', 'B', 0.35 ], [ 'B', 'C', 'C', 0.3 ], [ 'B', 'C', 'D', 0.25 ],
+    	 [ 'B', 'D', 'A', 0.3 ], [ 'B', 'D', 'B', 0.1 ], [ 'B', 'D', 'C', 0.2 ], [ 'B', 'D', 'D', 0.4 ],
+    	 [ 'C', 'A', 'A', 0.2 ], [ 'C', 'A', 'B', 0.3 ], [ 'C', 'A', 'C', 0.3 ], [ 'C', 'A', 'D', 0.2 ],
+    	 [ 'C', 'B', 'A', 0.35 ], [ 'C', 'B', 'B', 0.45 ], [ 'C', 'B', 'C', 0.0 ], [ 'C', 'B', 'D', 0.2 ],
+    	 [ 'C', 'C', 'A', 0.25 ], [ 'C', 'C', 'B', 0.0 ], [ 'C', 'C', 'C', 0.6 ], [ 'C', 'C', 'D', 0.15 ],
+    	 [ 'C', 'D', 'A', 0.8 ], [ 'C', 'D', 'B', 0.1 ], [ 'C', 'D', 'C', 0.05 ], [ 'C', 'D', 'D', 0.05 ],
+    	 [ 'D', 'A', 'A', 0.5 ], [ 'D', 'A', 'B', 0.0 ], [ 'D', 'A', 'C', 0.5 ], [ 'D', 'A', 'D', 0.0 ],
+    	 [ 'D', 'B', 'A', 0.35 ], [ 'D', 'B', 'B', 0.5 ], [ 'D', 'B', 'C', 0.1 ], [ 'D', 'B', 'D', 0.05 ],
+    	 [ 'D', 'C', 'A', 0.1 ], [ 'D', 'C', 'B', 0.45 ], [ 'D', 'C', 'C', 0.0 ], [ 'D', 'C', 'D', 0.45 ],
+    	 [ 'D', 'D', 'A', 0.2 ], [ 'D', 'D', 'B', 0.1 ], [ 'D', 'D', 'C', 0.1 ], [ 'D', 'D', 'D', 0.6 ]],
+    	 [ zeroth_dist, first_dist ] )
+    return data, weights, zeroth_dist, first_dist, second_dist
+
+
+def test_zeroth_dist(setup):
+    data, weights, zeroth_dist, first_dist, second_dist = setup
+    assert_almost_equal( zeroth_dist.log_probability( 'A' ), -2.3025850929940455 )
+    assert_almost_equal( zeroth_dist.log_probability( 'B' ), -1.6094379124341003 )
+    assert_almost_equal( zeroth_dist.log_probability( 'C' ), -1.2039728043259361 )
+    assert_almost_equal( zeroth_dist.log_probability( 'D' ), -0.916290731874155 )
+    assert zeroth_dist.log_probability( 'E' ) == -float('inf')
+
+
+def test_first_dist(setup):
+    data, weights, zeroth_dist, first_dist, second_dist = setup
+    assert_almost_equal( first_dist.log_probability( ('A', 'A') ), -0.22314355131420971 )
+    assert_almost_equal( first_dist.log_probability( ('A', 'B') ), -2.9957322735539909 )
+
+    assert_almost_equal( first_dist.log_probability( ('B', 'B') ), -1.6094379124341003 )
+    assert_almost_equal( first_dist.log_probability( ('B', 'C') ), -0.51082562376599072 )
+
+    assert_almost_equal( first_dist.log_probability( ('C', 'C') ), -1.3862943611198906 )
+    assert_almost_equal( first_dist.log_probability( ('C', 'D') ), -0.69314718055994529 )
+
+    assert_almost_equal( first_dist.log_probability( ('D', 'D') ), -2.3025850929940455 )
+    assert_almost_equal( first_dist.log_probability( ('D', 'A') ), -1.3862943611198906 )
+
+
+def test_second_dist(setup):
+    data, weights, zeroth_dist, first_dist, second_dist = setup
+    assert_almost_equal( second_dist.log_probability( ('A', 'A', 'C') ), -1.89711998489 )
+    assert_almost_equal( second_dist.log_probability( ('A', 'A', 'A') ), -2.99573227355 )
+    assert_almost_equal( second_dist.log_probability( ('A', 'B', 'A') ), -2.99573227355 )
+    assert_almost_equal( second_dist.log_probability( ('A', 'B', 'C') ), -0.162518929498 )
+
+    assert_almost_equal( second_dist.log_probability( ('A', 'C', 'C') ), -2.30258509299 )
+    assert_almost_equal( second_dist.log_probability( ('A', 'C', 'D') ), -2.30258509299 )
+    assert_almost_equal( second_dist.log_probability( ('A', 'D', 'B') ), -0.916290731874 )
+    assert_almost_equal( second_dist.log_probability( ('A', 'D', 'D') ), -2.99573227355 )
+
+    assert_almost_equal( second_dist.log_probability( ('B', 'A', 'B') ), -2.99573227355 )
+    assert_almost_equal( second_dist.log_probability( ('B', 'A', 'D') ), -0.69314718056 )
+    assert_almost_equal( second_dist.log_probability( ('B', 'B', 'B') ), -2.30258509299 )
+    assert second_dist.log_probability( ('B', 'B', 'D') ) == -float('inf')
+
+    assert_almost_equal( second_dist.log_probability( ('B', 'C', 'A') ), -2.30258509299 )
+    assert_almost_equal( second_dist.log_probability( ('B', 'C', 'B') ), -1.0498221245 )
+    assert_almost_equal( second_dist.log_probability( ('B', 'D', 'D') ), -0.916290731874 )
+    assert_almost_equal( second_dist.log_probability( ('B', 'D', 'B') ), -2.30258509299 )
+
+    assert_almost_equal( second_dist.log_probability( ('C', 'A', 'A') ), -1.60943791243 )
+    assert_almost_equal( second_dist.log_probability( ('C', 'A', 'B') ), -1.20397280433 )
+    assert second_dist.log_probability( ('C', 'B', 'C') ) == -float('inf')
+    assert_almost_equal( second_dist.log_probability( ('C', 'B', 'A') ), -1.0498221245 )
+
+    assert_almost_equal( second_dist.log_probability( ('C', 'C', 'D') ), -1.89711998489 )
+    assert second_dist.log_probability( ('C', 'C', 'B') ) == -float('inf')
+    assert_almost_equal( second_dist.log_probability( ('C', 'D', 'A') ), -0.223143551314 )
+    assert_almost_equal( second_dist.log_probability( ('C', 'D', 'C') ), -2.99573227355 )
+
+    assert second_dist.log_probability( ('D', 'A', 'D') ) == -float('inf')
+    assert_almost_equal( second_dist.log_probability( ('D', 'A', 'A') ), -0.69314718056 )
+    assert_almost_equal( second_dist.log_probability( ('D', 'B', 'D') ), -2.99573227355 )
+    assert_almost_equal( second_dist.log_probability( ('D', 'B', 'C') ), -2.30258509299 )
+
+    assert_almost_equal( second_dist.log_probability( ('D', 'C', 'A') ), -2.30258509299 )
+    assert_almost_equal( second_dist.log_probability( ('D', 'C', 'D') ), -0.798507696218 )
+    assert_almost_equal( second_dist.log_probability( ('D', 'D', 'D') ), -0.510825623766 )
+    assert_almost_equal( second_dist.log_probability( ('D', 'D', 'A') ), -1.60943791243 )
+
+
+def test_constructors(setup):
+    data, weights, zeroth_dist, first_dist, second_dist = setup
+    # raises no errors
+    first_chain = MarkovChain([ zeroth_dist, first_dist ])
+    second_chain = MarkovChain([ zeroth_dist, first_dist, second_dist ])
+
+
+def test_first_log_probability(setup):
+    data, weights, zeroth_dist, first_dist, second_dist = setup
+    # test going one state back
+    first_chain = MarkovChain([ zeroth_dist, first_dist ])
+
+    assert_almost_equal( first_chain.log_probability( list('A') ), -2.3025850929940455 )
+    assert_almost_equal( first_chain.log_probability( list('B') ), -1.6094379124341003 )
+
+    assert_almost_equal( first_chain.log_probability( list('AC') ), -5.2983173665480363 )
+    assert_almost_equal( first_chain.log_probability( list('AD') ), -4.6051701859880909 )
+    assert_almost_equal( first_chain.log_probability( list('BD') ), -3.9120230054281455 )
+    assert_almost_equal( first_chain.log_probability( list('BA') ), -3.9120230054281455 )
+    assert_almost_equal( first_chain.log_probability( list('CA') ), -3.1010927892118172 )
+    assert_almost_equal( first_chain.log_probability( list('CB') ), -3.5065578973199818 )
+    assert_almost_equal( first_chain.log_probability( list('DB') ), -2.3025850929940455 )
+    assert_almost_equal( first_chain.log_probability( list('DC') ), -1.83258146374831 )
+
+    assert_almost_equal( first_chain.log_probability( list('ABDD') ), -9.9034875525361272 )
+    assert_almost_equal( first_chain.log_probability( list('CCCB') ), -6.2791466195597625 )
+    assert_almost_equal( first_chain.log_probability( list('CCBD') ), -7.1954373514339167 )
+    assert_almost_equal( first_chain.log_probability( list('ACAC') ), -10.191169624987909 )
+
+    assert_almost_equal( first_chain.log_probability( list('ABDBCCDC') ), -12.493754717981954 )
+    assert_almost_equal( first_chain.log_probability( list('DACCBDCB') ), -14.508657738524217 )
+
+    assert_almost_equal( first_chain.log_probability( list('BCCCACBDBDBABACD') ), -30.75583043175768 )
+
+    assert_almost_equal( first_chain.log_probability( list('DABBCBDACAAADCBDCDBCBDCACBDABBAA') ), -53.616465272807112 )
+
+    # test going two states back
+    second_chain = MarkovChain([ zeroth_dist, first_dist, second_dist ])
+
+    assert_almost_equal( second_chain.log_probability( list('A') ), -2.30258509299 )
+    assert_almost_equal( second_chain.log_probability( list('B') ), -1.60943791243 )
+
+    assert_almost_equal( second_chain.log_probability( list('AC') ), -5.29831736655 )
+    assert_almost_equal( second_chain.log_probability( list('AD') ), -4.60517018599 )
+    assert_almost_equal( second_chain.log_probability( list('BD') ), -3.91202300543 )
+    assert_almost_equal( second_chain.log_probability( list('BA') ), -3.91202300543 )
+    assert_almost_equal( second_chain.log_probability( list('CA') ), -3.10109278921 )
+    assert_almost_equal( second_chain.log_probability( list('CB') ), -3.50655789732 )
+    assert_almost_equal( second_chain.log_probability( list('DB') ), -2.30258509299 )
+    assert_almost_equal( second_chain.log_probability( list('DC') ), -1.83258146375 )
+
+    assert_almost_equal( second_chain.log_probability( list('ABDD') ), -9.21034037198 )
+    assert_almost_equal( second_chain.log_probability( list('CCCD') ), -4.9982127741 )
+    assert second_chain.log_probability( list('CCBD') ) == -float('inf')
+    assert_almost_equal( second_chain.log_probability( list('ACAC') ), -6.85896511481 )
 
-	assert_almost_equal( second_chain.log_probability( list('ABDBCCDC') ), -18.9960448889 )
-	assert_almost_equal( second_chain.log_probability( list('DACCBDCB') ), -float('inf') )
+    assert_almost_equal( second_chain.log_probability( list('ABDBCCDC') ), -18.9960448889 )
+    assert second_chain.log_probability( list('DACCBDCB') ) == -float('inf')
 
-	assert_almost_equal( second_chain.log_probability( list('BCCCACBDBDBABACD') ), -29.1792463442 )
+    assert_almost_equal( second_chain.log_probability( list('BCCCACBDBDBABACD') ), -29.1792463442 )
 
-	assert_almost_equal( second_chain.log_probability( list('DABBCBDACAAADCBDCDBCBDCACBDABBAA') ), -float('inf') )
+    assert second_chain.log_probability( list('DABBCBDACAAADCBDCDBCBDCACBDABBAA') ) == -float('inf')
 
 # if summarize and from summaries work, so does fit
 
-@with_setup( setup, teardown )
-def test_summarize_no_weights_no_inertia():
-	first_chain = MarkovChain([ zeroth_dist, first_dist ])
-
-	# split in four
-	first_chain.summarize( data[:10] )
-	first_chain.summarize( data[10:20] )
-	first_chain.summarize( data[20:30] )
-	first_chain.summarize( data[30:] )
-	first_chain.from_summaries()
-
-	# check if probabilities are correct
-	assert_almost_equal( first_chain.log_probability( list('A') ), -1.29098418132 )
-	assert_almost_equal( first_chain.log_probability( list('B') ), -1.89711998489 )
-
-	assert_almost_equal( first_chain.log_probability( list('AC') ), -2.52493781785 )
-	assert_almost_equal( first_chain.log_probability( list('AD') ), -2.82721868973 )
-	assert_almost_equal( first_chain.log_probability( list('BD') ), -3.35240721749 )
-	assert_almost_equal( first_chain.log_probability( list('BA') ), -3.56371631116 )
-	assert_almost_equal( first_chain.log_probability( list('CA') ), -2.66812748722 )
-	assert_almost_equal( first_chain.log_probability( list('CB') ), -2.31672960038 )
-	assert_almost_equal( first_chain.log_probability( list('DB') ), -2.74959920402 )
-	assert_almost_equal( first_chain.log_probability( list('DC') ), -2.70514744144 )
-
-	assert_almost_equal( first_chain.log_probability( list('ABDD') ), -5.69233078086 )
-	assert_almost_equal( first_chain.log_probability( list('CCCB') ), -5.2049574644 )
-	assert_almost_equal( first_chain.log_probability( list('CCBD') ), -5.216130765 )
-	assert_almost_equal( first_chain.log_probability( list('ACAC') ), -5.30308884496 )
-
-	assert_almost_equal( first_chain.log_probability( list('ABDBCCDC') ), -11.1281275339 )
-	assert_almost_equal( first_chain.log_probability( list('DACCBDCB') ), -10.6827162728 )
-
-	assert_almost_equal( first_chain.log_probability( list('BCCCACBDBDBABACD') ), -23.2162130846 )
-
-	assert_almost_equal( first_chain.log_probability( list('DABBCBDACAAADCBDCDBCBDCACBDABBAA') ), -43.7174844781 )
-
-	second_chain = MarkovChain([ zeroth_dist, first_dist, second_dist ])
-
-	# split into four
-	second_chain.summarize( data[:10] )
-	second_chain.summarize( data[10:20] )
-	second_chain.summarize( data[20:30] )
-	second_chain.summarize( data[30:] )
-	second_chain.from_summaries()
-
-@with_setup( setup, teardown )
-def test_summarize_no_weights_with_inertia():
-	first_chain = MarkovChain([ zeroth_dist, first_dist ])
-
-	first_chain.summarize( data[:10] )
-	first_chain.summarize( data[10:20] )
-	first_chain.summarize( data[20:30] )
-	first_chain.summarize( data[30:] )
-	first_chain.from_summaries( inertia=0.4 )
-
-	assert_almost_equal( first_chain.log_probability( list('A') ), -1.58474529984 )
-	assert_almost_equal( first_chain.log_probability( list('B') ), -1.77195684193 )
-
-	assert_almost_equal( first_chain.log_probability( list('AC') ), -3.22112518823 )
-	assert_almost_equal( first_chain.log_probability( list('AD') ), -3.3619279842 )
-	assert_almost_equal( first_chain.log_probability( list('BD') ), -3.48675527002 )
-	assert_almost_equal( first_chain.log_probability( list('BA') ), -3.6470979201 )
-	assert_almost_equal( first_chain.log_probability( list('CA') ), -2.82601794483 )
-	assert_almost_equal( first_chain.log_probability( list('CB') ), -2.66015931757 )
-	assert_almost_equal( first_chain.log_probability( list('DB') ), -2.54362030796 )
-	assert_almost_equal( first_chain.log_probability( list('DC') ), -2.30916483161 )
-
-	assert_almost_equal( first_chain.log_probability( list('ABDD') ), -6.88185009599 )
-	assert_almost_equal( first_chain.log_probability( list('CCCB') ), -5.50132618677 )
-	assert_almost_equal( first_chain.log_probability( list('CCBD') ), -5.79554118026 )
-	assert_almost_equal( first_chain.log_probability( list('ACAC') ), -6.52834038129 )
-
-	assert_almost_equal( first_chain.log_probability( list('ABDBCCDC') ), -11.1045309105 )
-	assert_almost_equal( first_chain.log_probability( list('DACCBDCB') ), -11.519936158 )
-
-	assert_almost_equal( first_chain.log_probability( list('BCCCACBDBDBABACD') ), -24.8604337068 )
-
-	assert_almost_equal( first_chain.log_probability( list('DABBCBDACAAADCBDCDBCBDCACBDABBAA') ), -44.8853484278 )
-
-@with_setup( setup, teardown )
-def test_summarize_with_weights_no_inertia():
-	first_chain = MarkovChain([ zeroth_dist, first_dist ])
-
-	# split in four
-	first_chain.summarize( data[:10], weights=weights[:10] )
-	first_chain.summarize( data[10:20], weights=weights[10:20] )
-	first_chain.summarize( data[20:30], weights=weights[20:30] )
-	first_chain.summarize( data[30:], weights=weights[30:] )
-	first_chain.from_summaries()
-
-	assert_almost_equal( first_chain.log_probability( list('A') ), -0.961056745744 )
-	assert_almost_equal( first_chain.log_probability( list('B') ), -1.82454929205 )
-
-	assert_almost_equal( first_chain.log_probability( list('AC') ), -2.13478966488 )
-	assert_almost_equal( first_chain.log_probability( list('AD') ), -2.47837936927 )
-	assert_almost_equal( first_chain.log_probability( list('BD') ), -3.41892100613 )
-	assert_almost_equal( first_chain.log_probability( list('BA') ), -3.45702085236 )
-	assert_almost_equal( first_chain.log_probability( list('CA') ), -2.84952832968 )
-	assert_almost_equal( first_chain.log_probability( list('CB') ), -2.43790935599 )
-	assert_almost_equal( first_chain.log_probability( list('DB') ), -2.88910833385 )
-	assert_almost_equal( first_chain.log_probability( list('DC') ), -2.98200208074 )
-
-	assert_almost_equal( first_chain.log_probability( list('ABDD') ), -5.56874179664 )
-	assert_almost_equal( first_chain.log_probability( list('CCCB') ), -5.71252297888 )
-	assert_almost_equal( first_chain.log_probability( list('CCBD') ), -5.66958788152 )
-	assert_almost_equal( first_chain.log_probability( list('ACAC') ), -4.78548674538 )
-
-	assert_almost_equal( first_chain.log_probability( list('ABDBCCDC') ), -11.3892431338 )
-	assert_almost_equal( first_chain.log_probability( list('DACCBDCB') ), -11.0991865874 )
-
-	assert_almost_equal( first_chain.log_probability( list('BCCCACBDBDBABACD') ), -23.5462387667 )
-
-	assert_almost_equal( first_chain.log_probability( list('DABBCBDACAAADCBDCDBCBDCACBDABBAA') ), -44.3127858762 )
-
-@with_setup( setup, teardown )
-def test_summarize_with_weights_with_inertia():
-	first_chain = MarkovChain([ zeroth_dist, first_dist ])
-
-	# split in four
-	first_chain.summarize( data[:10], weights=weights[:10] )
-	first_chain.summarize( data[10:20], weights=weights[10:20] )
-	first_chain.summarize( data[20:30], weights=weights[20:30] )
-	first_chain.summarize( data[30:], weights=weights[30:] )
-	first_chain.from_summaries( inertia=0.4 )
-
-	assert_almost_equal( first_chain.log_probability( list('A') ), -1.3112125381 )
-	assert_almost_equal( first_chain.log_probability( list('B') ), -1.73288210353 )
-
-	assert_almost_equal( first_chain.log_probability( list('AC') ), -2.89339373397 )
-	assert_almost_equal( first_chain.log_probability( list('AD') ), -3.07392432189 )
-	assert_almost_equal( first_chain.log_probability( list('BD') ), -3.55414269165 )
-	assert_almost_equal( first_chain.log_probability( list('BA') ), -3.58268887356 )
-	assert_almost_equal( first_chain.log_probability( list('CA') ), -2.92624445535 )
-	assert_almost_equal( first_chain.log_probability( list('CB') ), -2.70099965753 )
-	assert_almost_equal( first_chain.log_probability( list('DB') ), -2.596587547 )
-	assert_almost_equal( first_chain.log_probability( list('DC') ), -2.43824119019 )
-
-	assert_almost_equal( first_chain.log_probability( list('ABDD') ), -6.77853581842 )
-	assert_almost_equal( first_chain.log_probability( list('CCCB') ), -5.75946483735 )
-	assert_almost_equal( first_chain.log_probability( list('CCBD') ), -6.05149283556 )
-	assert_almost_equal( first_chain.log_probability( list('ACAC') ), -6.10013721195 )
-
-	assert_almost_equal( first_chain.log_probability( list('ABDBCCDC') ), -11.2181867683 )
-	assert_almost_equal( first_chain.log_probability( list('DACCBDCB') ), -11.6681121956 )
-
-	assert_almost_equal( first_chain.log_probability( list('BCCCACBDBDBABACD') ), -25.0365515667 )
-
-	assert_almost_equal( first_chain.log_probability( list('DABBCBDACAAADCBDCDBCBDCACBDABBAA') ), -45.1660985662 )
-
-@with_setup( setup, teardown )
-def test_raise_errors():
-	pass
-
-@with_setup( setup, teardown )
-def test_pickling():
-	chain1 = MarkovChain([ zeroth_dist, first_dist ])
-	chain2 = pickle.loads( pickle.dumps( chain1 ) )
-
-	assert_almost_equal( chain1.log_probability( list('BCCCACBDBDBABACD') ),
-	                     chain2.log_probability( list('BCCCACBDBDBABACD') ) )
-
-@with_setup( setup, teardown )
-def test_json():
-	chain1 = MarkovChain([ zeroth_dist, first_dist ])
-	chain2 = MarkovChain.from_json(chain1.to_json())
-
-	assert_almost_equal( chain1.log_probability( list('BCCCACBDBDBABACD') ),
-	                     chain2.log_probability( list('BCCCACBDBDBABACD') ) )
-
-@with_setup( setup, teardown )
-def test_robust_from_json():
-	chain1 = MarkovChain([ zeroth_dist, first_dist ])
-	chain2 = from_json(chain1.to_json())
 
-	assert_almost_equal( chain1.log_probability( list('BCCCACBDBDBABACD') ),
-	                     chain2.log_probability( list('BCCCACBDBDBABACD') ) )
\ No newline at end of file
+def test_summarize_no_weights_no_inertia(setup):
+    data, weights, zeroth_dist, first_dist, second_dist = setup
+    first_chain = MarkovChain([ zeroth_dist, first_dist ])
+
+    # split in four
+    first_chain.summarize( data[:10] )
+    first_chain.summarize( data[10:20] )
+    first_chain.summarize( data[20:30] )
+    first_chain.summarize( data[30:] )
+    first_chain.from_summaries()
+
+    # check if probabilities are correct
+    assert_almost_equal( first_chain.log_probability( list('A') ), -1.29098418132 )
+    assert_almost_equal( first_chain.log_probability( list('B') ), -1.89711998489 )
+
+    assert_almost_equal( first_chain.log_probability( list('AC') ), -2.52493781785 )
+    assert_almost_equal( first_chain.log_probability( list('AD') ), -2.82721868973 )
+    assert_almost_equal( first_chain.log_probability( list('BD') ), -3.35240721749 )
+    assert_almost_equal( first_chain.log_probability( list('BA') ), -3.56371631116 )
+    assert_almost_equal( first_chain.log_probability( list('CA') ), -2.66812748722 )
+    assert_almost_equal( first_chain.log_probability( list('CB') ), -2.31672960038 )
+    assert_almost_equal( first_chain.log_probability( list('DB') ), -2.74959920402 )
+    assert_almost_equal( first_chain.log_probability( list('DC') ), -2.70514744144 )
+
+    assert_almost_equal( first_chain.log_probability( list('ABDD') ), -5.69233078086 )
+    assert_almost_equal( first_chain.log_probability( list('CCCB') ), -5.2049574644 )
+    assert_almost_equal( first_chain.log_probability( list('CCBD') ), -5.216130765 )
+    assert_almost_equal( first_chain.log_probability( list('ACAC') ), -5.30308884496 )
+
+    assert_almost_equal( first_chain.log_probability( list('ABDBCCDC') ), -11.1281275339 )
+    assert_almost_equal( first_chain.log_probability( list('DACCBDCB') ), -10.6827162728 )
+
+    assert_almost_equal( first_chain.log_probability( list('BCCCACBDBDBABACD') ), -23.2162130846 )
+
+    assert_almost_equal( first_chain.log_probability( list('DABBCBDACAAADCBDCDBCBDCACBDABBAA') ), -43.7174844781 )
+
+    second_chain = MarkovChain([ zeroth_dist, first_dist, second_dist ])
+
+    # split into four
+    second_chain.summarize( data[:10] )
+    second_chain.summarize( data[10:20] )
+    second_chain.summarize( data[20:30] )
+    second_chain.summarize( data[30:] )
+    second_chain.from_summaries()
+
+
+def test_summarize_no_weights_with_inertia(setup):
+    data, weights, zeroth_dist, first_dist, second_dist = setup
+    first_chain = MarkovChain([ zeroth_dist, first_dist ])
+
+    first_chain.summarize( data[:10] )
+    first_chain.summarize( data[10:20] )
+    first_chain.summarize( data[20:30] )
+    first_chain.summarize( data[30:] )
+    first_chain.from_summaries( inertia=0.4 )
+
+    assert_almost_equal( first_chain.log_probability( list('A') ), -1.58474529984 )
+    assert_almost_equal( first_chain.log_probability( list('B') ), -1.77195684193 )
+
+    assert_almost_equal( first_chain.log_probability( list('AC') ), -3.22112518823 )
+    assert_almost_equal( first_chain.log_probability( list('AD') ), -3.3619279842 )
+    assert_almost_equal( first_chain.log_probability( list('BD') ), -3.48675527002 )
+    assert_almost_equal( first_chain.log_probability( list('BA') ), -3.6470979201 )
+    assert_almost_equal( first_chain.log_probability( list('CA') ), -2.82601794483 )
+    assert_almost_equal( first_chain.log_probability( list('CB') ), -2.66015931757 )
+    assert_almost_equal( first_chain.log_probability( list('DB') ), -2.54362030796 )
+    assert_almost_equal( first_chain.log_probability( list('DC') ), -2.30916483161 )
+
+    assert_almost_equal( first_chain.log_probability( list('ABDD') ), -6.88185009599 )
+    assert_almost_equal( first_chain.log_probability( list('CCCB') ), -5.50132618677 )
+    assert_almost_equal( first_chain.log_probability( list('CCBD') ), -5.79554118026 )
+    assert_almost_equal( first_chain.log_probability( list('ACAC') ), -6.52834038129 )
+
+    assert_almost_equal( first_chain.log_probability( list('ABDBCCDC') ), -11.1045309105 )
+    assert_almost_equal( first_chain.log_probability( list('DACCBDCB') ), -11.519936158 )
+
+    assert_almost_equal( first_chain.log_probability( list('BCCCACBDBDBABACD') ), -24.8604337068 )
+
+    assert_almost_equal( first_chain.log_probability( list('DABBCBDACAAADCBDCDBCBDCACBDABBAA') ), -44.8853484278 )
+
+
+def test_summarize_with_weights_no_inertia(setup):
+    data, weights, zeroth_dist, first_dist, second_dist = setup
+    first_chain = MarkovChain([ zeroth_dist, first_dist ])
+
+    # split in four
+    first_chain.summarize( data[:10], weights=weights[:10] )
+    first_chain.summarize( data[10:20], weights=weights[10:20] )
+    first_chain.summarize( data[20:30], weights=weights[20:30] )
+    first_chain.summarize( data[30:], weights=weights[30:] )
+    first_chain.from_summaries()
+
+    assert_almost_equal( first_chain.log_probability( list('A') ), -0.961056745744 )
+    assert_almost_equal( first_chain.log_probability( list('B') ), -1.82454929205 )
+
+    assert_almost_equal( first_chain.log_probability( list('AC') ), -2.13478966488 )
+    assert_almost_equal( first_chain.log_probability( list('AD') ), -2.47837936927 )
+    assert_almost_equal( first_chain.log_probability( list('BD') ), -3.41892100613 )
+    assert_almost_equal( first_chain.log_probability( list('BA') ), -3.45702085236 )
+    assert_almost_equal( first_chain.log_probability( list('CA') ), -2.84952832968 )
+    assert_almost_equal( first_chain.log_probability( list('CB') ), -2.43790935599 )
+    assert_almost_equal( first_chain.log_probability( list('DB') ), -2.88910833385 )
+    assert_almost_equal( first_chain.log_probability( list('DC') ), -2.98200208074 )
+
+    assert_almost_equal( first_chain.log_probability( list('ABDD') ), -5.56874179664 )
+    assert_almost_equal( first_chain.log_probability( list('CCCB') ), -5.71252297888 )
+    assert_almost_equal( first_chain.log_probability( list('CCBD') ), -5.66958788152 )
+    assert_almost_equal( first_chain.log_probability( list('ACAC') ), -4.78548674538 )
+
+    assert_almost_equal( first_chain.log_probability( list('ABDBCCDC') ), -11.3892431338 )
+    assert_almost_equal( first_chain.log_probability( list('DACCBDCB') ), -11.0991865874 )
+
+    assert_almost_equal( first_chain.log_probability( list('BCCCACBDBDBABACD') ), -23.5462387667 )
+
+    assert_almost_equal( first_chain.log_probability( list('DABBCBDACAAADCBDCDBCBDCACBDABBAA') ), -44.3127858762 )
+
+
+def test_summarize_with_weights_with_inertia(setup):
+    data, weights, zeroth_dist, first_dist, second_dist = setup
+    first_chain = MarkovChain([ zeroth_dist, first_dist ])
+
+    # split in four
+    first_chain.summarize( data[:10], weights=weights[:10] )
+    first_chain.summarize( data[10:20], weights=weights[10:20] )
+    first_chain.summarize( data[20:30], weights=weights[20:30] )
+    first_chain.summarize( data[30:], weights=weights[30:] )
+    first_chain.from_summaries( inertia=0.4 )
+
+    assert_almost_equal( first_chain.log_probability( list('A') ), -1.3112125381 )
+    assert_almost_equal( first_chain.log_probability( list('B') ), -1.73288210353 )
+
+    assert_almost_equal( first_chain.log_probability( list('AC') ), -2.89339373397 )
+    assert_almost_equal( first_chain.log_probability( list('AD') ), -3.07392432189 )
+    assert_almost_equal( first_chain.log_probability( list('BD') ), -3.55414269165 )
+    assert_almost_equal( first_chain.log_probability( list('BA') ), -3.58268887356 )
+    assert_almost_equal( first_chain.log_probability( list('CA') ), -2.92624445535 )
+    assert_almost_equal( first_chain.log_probability( list('CB') ), -2.70099965753 )
+    assert_almost_equal( first_chain.log_probability( list('DB') ), -2.596587547 )
+    assert_almost_equal( first_chain.log_probability( list('DC') ), -2.43824119019 )
+
+    assert_almost_equal( first_chain.log_probability( list('ABDD') ), -6.77853581842 )
+    assert_almost_equal( first_chain.log_probability( list('CCCB') ), -5.75946483735 )
+    assert_almost_equal( first_chain.log_probability( list('CCBD') ), -6.05149283556 )
+    assert_almost_equal( first_chain.log_probability( list('ACAC') ), -6.10013721195 )
+
+    assert_almost_equal( first_chain.log_probability( list('ABDBCCDC') ), -11.2181867683 )
+    assert_almost_equal( first_chain.log_probability( list('DACCBDCB') ), -11.6681121956 )
+
+    assert_almost_equal( first_chain.log_probability( list('BCCCACBDBDBABACD') ), -25.0365515667 )
+
+    assert_almost_equal( first_chain.log_probability( list('DABBCBDACAAADCBDCDBCBDCACBDABBAA') ), -45.1660985662 )
+
+
+def test_raise_errors(setup):
+    data, weights, zeroth_dist, first_dist, second_dist = setup
+    pass
+
+
+def test_pickling(setup):
+    data, weights, zeroth_dist, first_dist, second_dist = setup
+    chain1 = MarkovChain([ zeroth_dist, first_dist ])
+    chain2 = pickle.loads( pickle.dumps( chain1 ) )
+
+    assert_almost_equal( chain1.log_probability( list('BCCCACBDBDBABACD') ),
+                         chain2.log_probability( list('BCCCACBDBDBABACD') ) )
+
+
+def test_json(setup):
+    data, weights, zeroth_dist, first_dist, second_dist = setup
+    chain1 = MarkovChain([ zeroth_dist, first_dist ])
+    chain2 = MarkovChain.from_json(chain1.to_json())
+
+    assert_almost_equal( chain1.log_probability( list('BCCCACBDBDBABACD') ),
+                         chain2.log_probability( list('BCCCACBDBDBABACD') ) )
+
+
+def test_robust_from_json(setup):
+    data, weights, zeroth_dist, first_dist, second_dist = setup
+    chain1 = MarkovChain([ zeroth_dist, first_dist ])
+    chain2 = from_json(chain1.to_json())
+
+    assert_almost_equal( chain1.log_probability( list('BCCCACBDBDBABACD') ),
+                         chain2.log_probability( list('BCCCACBDBDBABACD') ) )
