diff --git a/ArrayTests.hpp b/ArrayTests.hpp index c5cbcb18..36d40d79 100644 --- a/ArrayTests.hpp +++ b/ArrayTests.hpp @@ -15,38 +15,25 @@ class ArrayOpsTest : public Test { testStart("quantize_v2"); //reference inputs /Users/neitan01/Documents/mbed/uTensor.git/TESTS/scripts/PRE-GEN/qA - TENSOR b = ctx.add(t_import.float_import ("/fs/testData/qB/in/Cast_1_0.idx")); - TENSOR b_min = ctx.add(t_import.float_import("/fs/testData/qB/in/Min_1_0.idx")); - TENSOR b_max = ctx.add(t_import.float_import("/fs/testData/qB/in/Max_1_0.idx")); + S_TENSOR b_q_ref = ctx.add(t_import.float_import ("/fs/testData/qB/in/Cast_1_0.idx", "b_q_ref")); + S_TENSOR b_min_q_ref = ctx.add(t_import.float_import("/fs/testData/qB/in/Min_1_0.idx", "b_min_q_ref")); + S_TENSOR b_max_q_ref = ctx.add(t_import.float_import("/fs/testData/qB/in/Max_1_0.idx", "b_max_q_ref")); //reference outputs - TENSOR b_q_ref = ctx.add(t_import.ubyte_import("/fs/testData/qB/out/qB_0.idx")); - TENSOR b_min_q_ref = ctx.add(t_import.float_import("/fs/testData/qB/out/qB_1.idx")); - TENSOR b_max_q_ref = ctx.add(t_import.float_import("/fs/testData/qB/out/qb_2.idx")); - - TENSOR b_q = ctx.add(new RamTensor(b_q_ref.lock()->getShape())); - TENSOR b_min_q = ctx.add(new RamTensor(b_min_q_ref.lock()->getShape())); - TENSOR b_max_q = ctx.add(new RamTensor(b_max_q_ref.lock()->getShape())); - - TList inputs = {b, b_min, b_max}; - TList outputs = {b_q, b_min_q, b_max_q}; - S_TENSOR out_b_q = b_q.lock(); - S_TENSOR out_b_min_q = b_min_q.lock(); - S_TENSOR out_b_max_q = b_max_q.lock(); - S_TENSOR ref_b_q = b_q_ref.lock(); - S_TENSOR ref_b_min_q = b_min_q_ref.lock(); - S_TENSOR ref_b_max_q = b_max_q_ref.lock(); + S_TENSOR ref_b_q = ctx.add(t_import.ubyte_import("/fs/testData/qB/out/qB_0.idx", "ref_b_q")); + S_TENSOR ref_b_min_q = ctx.add(t_import.float_import("/fs/testData/qB/out/qB_1.idx", "ref_b_min_q")); + S_TENSOR ref_b_max_q = ctx.add(t_import.float_import("/fs/testData/qB/out/qb_2.idx", "ref_b_max_q")); + + S_TENSOR out_b_q = ctx.add(new RamTensor(b_q_ref->getShape(), "b_q")); + S_TENSOR out_b_min_q = ctx.add(new RamTensor(b_min_q_ref->getShape(), "b_min_q")); + S_TENSOR out_b_max_q = ctx.add(new RamTensor(b_max_q_ref->getShape(), "b_max_q")); //Implementation goes here timer_start(); - ctx.push(new QuantizeV2Op(), inputs, outputs); + ctx.push(new QuantizeV2Op(), {"b_q_ref", "b_min_q_ref", "b_max_q_ref"}, {"b_q", "b_min_q", "b_max_q"}); ctx.eval(); timer_stop(); - // printf("refMin is : %f \r\n", *(b_min_q_ref.getPointer({0}))); - // printf("outMin is : %f \r\n", *(b_min_q.getPointer({0}))); - // printf("diff : output(%f), outMin(%f), outMax(%f)\r\n", - // meanPercentErr(b_q_ref, b_q), meanPercentErr(b_min_q_ref, b_min_q), meanPercentErr(b_max_q_ref, b_max_q)); double result = meanPercentErr(ref_b_q.get(), out_b_q.get()) + meanPercentErr(ref_b_min_q.get(), out_b_min_q.get()) + meanPercentErr(ref_b_max_q.get(), out_b_max_q.get()); //passed(result < 0.0001); @@ -57,27 +44,22 @@ class ArrayOpsTest : public Test { testStart("dequantize"); //reference inputs - TENSOR a = ctx.add(t_import.ubyte_import("/fs/testData/deQ/in/rQ_0.idx")); - TENSOR a_min = ctx.add(t_import.float_import("/fs/testData/deQ/in/rQ_1.idx")); - TENSOR a_max = ctx.add(t_import.float_import("/fs/testData/deQ/in/rQ_2.idx")); + S_TENSOR a = ctx.add(t_import.ubyte_import("/fs/testData/deQ/in/rQ_0.idx", "a")); + S_TENSOR a_min = ctx.add(t_import.float_import("/fs/testData/deQ/in/rQ_1.idx", "a_min")); + S_TENSOR a_max = ctx.add(t_import.float_import("/fs/testData/deQ/in/rQ_2.idx", "a_max")); //reference outputs - TENSOR out_ref = ctx.add(t_import.float_import("/fs/testData/deQ/out/deQ_0.idx")); + S_TENSOR out_ref = ctx.add(t_import.float_import("/fs/testData/deQ/out/deQ_0.idx", "out_ref")); //modify the checks below: - TENSOR out = ctx.add(new RamTensor(out_ref.lock()->getShape())); - TList inputs = {a, a_min, a_max}; - TList outputs = {out}; - - S_TENSOR out_val = out.lock(); - S_TENSOR ref_out = out_ref.lock(); + S_TENSOR out = ctx.add(new RamTensor(out_ref->getShape(), "out")); timer_start(); - ctx.push(new DequantizeOp(), inputs, outputs); + ctx.push(new DequantizeOp(), {"a", "a_min", "a_max"}, {"out"}); ctx.eval(); timer_stop(); - double result = meanPercentErr(out_val.get(), ref_out.get()); + double result = meanPercentErr(out.get(), out_ref.get()); //passed(result < 0.0001); passed(result == 0); } @@ -87,26 +69,22 @@ class ArrayOpsTest : public Test { TensorIdxImporter t_import; //reference inputs - TENSOR ref_a = ctx.add(t_import.float_import("/fs/testData/ref_reshape/in/Const_0.idx")); - TENSOR ref_dim = ctx.add(t_import.int_import("/fs/testData/ref_reshape/in/Const_1_0.idx")); + S_TENSOR ref_a = ctx.add(t_import.float_import("/fs/testData/ref_reshape/in/Const_0.idx", "ref_a")); + S_TENSOR ref_dim = ctx.add(t_import.int_import("/fs/testData/ref_reshape/in/Const_1_0.idx", "ref_dim")); //reference outputs - TENSOR out_ref = ctx.add(t_import.float_import("/fs/testData/ref_reshape/out/ref_reshape_0.idx")); + S_TENSOR out_ref_2 = ctx.add(t_import.float_import("/fs/testData/ref_reshape/out/ref_reshape_0.idx", "out_ref_2")); //modify the checks below: - TENSOR out = ctx.add(new RamTensor(out_ref.lock()->getShape())); - S_TENSOR out_val = out.lock(); - S_TENSOR ref_out = out_ref.lock(); + S_TENSOR out_2 = ctx.add(new RamTensor(out_ref_2->getShape(), "out_2")); - TList inputs = {ref_a, ref_dim}; - TList outputs = {out}; timer_start(); - ctx.push(new ReshapeOp(), inputs, outputs); + ctx.push(new ReshapeOp(), {"ref_a", "ref_dim"}, {"out_2"}); ctx.eval(); timer_stop(); - double result = meanPercentErr(out_val.get(), ref_out.get()); + double result = meanPercentErr(out_2.get(), out_ref_2.get()); //passed(result < 0.0001); passed(result == 0); } diff --git a/MathTests.hpp b/MathTests.hpp index f4dcf057..65714377 100644 --- a/MathTests.hpp +++ b/MathTests.hpp @@ -13,40 +13,35 @@ class MathOpsTest : public Test { void requantization_rangeTest(void) { testStart("requantization_range"); + ctx.gc(); + + //Note: raw pointers should be owned ONLY by the context. no copy of the raw pointer should exist elsewhere // reference inputs - TENSOR a = - ctx.add(t_import.int_import("/fs/testData/rqRange/in/qMatMul_0.idx")); - TENSOR a_min = - ctx.add(t_import.float_import("/fs/testData/rqRange/in/qMatMul_1.idx")); - TENSOR a_max = - ctx.add(t_import.float_import("/fs/testData/rqRange/in/qMatMul_2.idx")); + ctx.add(t_import.int_import("/fs/testData/rqRange/in/qMatMul_0.idx", "a")); + ctx.add(t_import.float_import("/fs/testData/rqRange/in/qMatMul_1.idx", "a_min")); + ctx.add(t_import.float_import("/fs/testData/rqRange/in/qMatMul_2.idx", "a_max")); - // reference outputs - TENSOR ref_min = - ctx.add(t_import.float_import("/fs/testData/rqRange/out/rqRange_0.idx")); - TENSOR ref_max = - ctx.add(t_import.float_import("/fs/testData/rqRange/out/rqRange_1.idx")); + // reference output + ctx.add(t_import.float_import("/fs/testData/rqRange/out/rqRange_0.idx", "ref_min")); + ctx.add(t_import.float_import("/fs/testData/rqRange/out/rqRange_1.idx", "ref_max")); // Implementation goes here // modify the checks below: - TENSOR out_min = ctx.add(new RamTensor(ref_min.lock()->getShape())); - TENSOR out_max = ctx.add(new RamTensor(ref_max.lock()->getShape())); - TList inputs = {a, a_min, a_max}; - TList outputs = {out_min, out_max}; - - S_TENSOR ref_min_val = ref_min.lock(); - S_TENSOR ref_max_val = ref_max.lock(); - S_TENSOR out_min_val = out_min.lock(); - S_TENSOR out_max_val = out_max.lock(); + ctx.add(new RamTensor(ctx.get("ref_min")->getShape(), "out_min")); + ctx.add(new RamTensor(ctx.get("ref_max")->getShape(), "out_max")); + TNameList inputs = {"a", "a_min", "a_max"}; + TNameList outputs = {"out_min", "out_max"}; timer_start(); ctx.push(new Requantization_RangeOp(), inputs, outputs); ctx.eval(); timer_stop(); + //Note: an output tensor will not be auto-deleted by context unless it has been used as an input double result = - meanPercentErr(ref_min_val.get(), out_min_val.get()) + meanPercentErr(ref_max_val.get(), out_max_val.get()); + meanPercentErr(ctx.get("ref_min").get(), ctx.get("out_min").get()) + + meanPercentErr(ctx.get("ref_max").get(), ctx.get("out_max").get()); // passed(result < 0.0001); passed(result == 0); } @@ -54,49 +49,42 @@ class MathOpsTest : public Test { void requantizeTest(void) { testStart("requantize"); + ctx.gc(); + // reference inputs - TENSOR a = ctx.add(t_import.int_import("/fs/testData/rQ/in/qMatMul_0.idx")); - TENSOR a_min = - ctx.add(t_import.float_import("/fs/testData/rQ/in/qMatMul_1.idx")); - TENSOR a_max = - ctx.add(t_import.float_import("/fs/testData/rQ/in/qMatMul_2.idx")); - TENSOR r_a_min = - ctx.add(t_import.float_import("/fs/testData/rQ/in/rqRange_0.idx")); - TENSOR r_a_max = - ctx.add(t_import.float_import("/fs/testData/rQ/in/rqRange_1.idx")); + ctx.add(t_import.int_import("/fs/testData/rQ/in/qMatMul_0.idx", "a")); + ctx.add(t_import.float_import("/fs/testData/rQ/in/qMatMul_1.idx", "a_min")); + ctx.add(t_import.float_import("/fs/testData/rQ/in/qMatMul_2.idx", "a_max")); + ctx.add(t_import.float_import("/fs/testData/rQ/in/rqRange_0.idx", "r_a_min")); + ctx.add(t_import.float_import("/fs/testData/rQ/in/rqRange_1.idx", "r_a_max")); // tf.quint8 + //Note: + //Instead of using ctx.get() to obtain a shared_ptr, you may also use the shared_ptr returned by ctx.add() + // reference outputs - TENSOR ref_a_q = - ctx.add(t_import.ubyte_import("/fs/testData/rQ/out/rQ_0.idx")); - TENSOR ref_a_min = - ctx.add(t_import.float_import("/fs/testData/rQ/out/rQ_1.idx")); - TENSOR ref_a_max = - ctx.add(t_import.float_import("/fs/testData/rQ/out/rQ_2.idx")); + S_TENSOR ref_a_q = ctx.add(t_import.ubyte_import("/fs/testData/rQ/out/rQ_0.idx", "ref_a_q")); + S_TENSOR ref_a_min = ctx.add(t_import.float_import("/fs/testData/rQ/out/rQ_1.idx", "ref_a_min")); + S_TENSOR ref_a_max = ctx.add(t_import.float_import("/fs/testData/rQ/out/rQ_2.idx", "ref_a_max")); // modify the checks below: - TENSOR a_q = ctx.add(new RamTensor(ref_a_q.lock()->getShape())); - TENSOR a_min_q = ctx.add(new RamTensor(ref_a_min.lock()->getShape())); - TENSOR a_max_q = ctx.add(new RamTensor(ref_a_max.lock()->getShape())); - - TList inputs = {a, a_min, a_max, r_a_min, r_a_max}; - TList outputs = {a_q, a_min_q, a_max_q}; - - S_TENSOR ref_a = ref_a_q.lock(); - S_TENSOR out_a = a_q.lock(); - S_TENSOR ref_min = ref_a_min.lock(); - S_TENSOR out_min = a_min_q.lock(); - S_TENSOR ref_max = ref_a_max.lock(); - S_TENSOR out_max = a_max_q.lock(); + S_TENSOR a_q = ctx.add(new RamTensor(ref_a_q->getShape(), "a_q")); + S_TENSOR a_min_q = ctx.add(new RamTensor(ref_a_min->getShape(), "a_min_q")); + S_TENSOR a_max_q = ctx.add(new RamTensor(ref_a_max->getShape(), "a_max_q")); + + + TNameList inputs = {"a", "a_min", "a_max", "r_a_min", "r_a_max"}; + TNameList outputs = {"a_q", "a_min_q", "a_max_q"}; + // Implementation goes here timer_start(); ctx.push(new RequantizeOp(), inputs, outputs); ctx.eval(); timer_stop(); - double result = meanPercentErr(ref_a.get(), out_a.get()) + - meanPercentErr(ref_min.get(), out_min.get()) + - meanPercentErr(ref_max.get(), out_max.get()); + double result = meanPercentErr(ref_a_q.get(), a_q.get()) + + meanPercentErr(ref_a_min.get(), a_min_q.get()) + + meanPercentErr(ref_a_max.get(), a_max_q.get()); // passed(result < 0.0001); passed(result == 0); } @@ -104,48 +92,42 @@ class MathOpsTest : public Test { void requantizeTest2(void) { testStart("requantize2"); + ctx.gc(); + // reference inputs - TENSOR a = ctx.add(t_import.int_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_quantized_mat_mul_0.idx")); - TENSOR a_min = - ctx.add(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_quantized_mat_mul_1.idx")); - TENSOR a_max = - ctx.add(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_quantized_mat_mul_2.idx")); - TENSOR r_a_min = - ctx.add(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_requant_range_0.idx")); - TENSOR r_a_max = - ctx.add(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_requant_range_1.idx")); + ctx.add(t_import.int_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_quantized_mat_mul_0.idx", "a")); + ctx.add(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_quantized_mat_mul_1.idx", "a_min")); + ctx.add(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_quantized_mat_mul_2.idx", "a_max")); + ctx.add(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_requant_range_0.idx", "r_a_min")); + ctx.add(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/in/import-MatMul_eightbit_requant_range_1.idx", "r_a_max")); // tf.quint8 // reference outputs - TENSOR ref_a_q = - ctx.add(t_import.ubyte_import("/fs/testData/import-MatMul_eightbit_requantize/out/import-MatMul_eightbit_requantize_0.idx")); - TENSOR ref_a_min = - ctx.add(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/out/import-MatMul_eightbit_requantize_1.idx")); - TENSOR ref_a_max = - ctx.add(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/out/import-MatMul_eightbit_requantize_2.idx")); + ctx.add(t_import.ubyte_import("/fs/testData/import-MatMul_eightbit_requantize/out/import-MatMul_eightbit_requantize_0.idx", "ref_a_q")); + ctx.add(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/out/import-MatMul_eightbit_requantize_1.idx", "ref_a_min")); + ctx.add(t_import.float_import("/fs/testData/import-MatMul_eightbit_requantize/out/import-MatMul_eightbit_requantize_2.idx", "ref_a_max")); // modify the checks below: - TENSOR a_q = ctx.add(new RamTensor(ref_a_q.lock()->getShape())); - TENSOR a_min_q = ctx.add(new RamTensor(ref_a_min.lock()->getShape())); - TENSOR a_max_q = ctx.add(new RamTensor(ref_a_max.lock()->getShape())); - TList inputs = {a, a_min, a_max, r_a_min, r_a_max}; - TList outputs = {a_q, a_min_q, a_max_q}; - S_TENSOR ref_val = ref_a_q.lock(); - S_TENSOR ref_min = ref_a_min.lock(); - S_TENSOR ref_max = ref_a_max.lock(); - S_TENSOR out_val = a_q.lock(); - S_TENSOR out_min = a_min_q.lock(); - S_TENSOR out_max = a_max_q.lock(); + ctx.add(new RamTensor(ctx.get("ref_a_q")->getShape(), "a_q")); + ctx.add(new RamTensor(ctx.get("ref_a_min")->getShape(), "a_min_q")); + ctx.add(new RamTensor(ctx.get("ref_a_max")->getShape(), "a_max_q")); + + S_TENSOR ref_val = ctx.get("ref_a_q"); + S_TENSOR ref_min = ctx.get("ref_a_min"); + S_TENSOR ref_max = ctx.get("ref_a_max"); + S_TENSOR out_val = ctx.get("a_q"); + S_TENSOR out_min = ctx.get("a_min_q"); + S_TENSOR out_max = ctx.get("a_max_q"); // Implementation goes here timer_start(); - ctx.push(new RequantizeOp(), inputs, outputs); + ctx.push(new RequantizeOp(), {"a", "a_min", "a_max", "r_a_min", "r_a_max"}, {"a_q", "a_min_q", "a_max_q"}); ctx.eval(); timer_stop(); double result; - if((result = meanPercentErr(ref_val.get(), out_val.get())) != 0) { + if((result = meanPercentErr(ctx.get("ref_a_q").get(), out_val.get())) != 0) { printf("Requantize a_q failed (%.6f)\r\n", result); unsigned char* ref_ptr = ref_val.get()->write(0, 0); unsigned char* test_ptr = out_val.get()->write(0, 0); @@ -173,33 +155,33 @@ class MathOpsTest : public Test { void argmaxTest(void) { // NT: WIP do not use t_import int 64 here testStart("argmax"); + ctx.gc(); + // reference inputs - TENSOR ref_a = ctx.add(t_import.float_import("/fs/testData/ArgMax/in/ArgMax-input_0.idx")); - TENSOR ref_dim = ctx.add(t_import.int_import("/fs/testData/ArgMax/in/ArgMax-dimension_0.idx")); + ctx.add(t_import.float_import("/fs/testData/ArgMax/in/ArgMax-input_0.idx", "ref_a")); + ctx.add(t_import.int_import("/fs/testData/ArgMax/in/ArgMax-dimension_0.idx", "ref_dim")); // reference outputs /// NT: FIXME: argmax outputs int64 tensor which isn't supported by /// int_import. - TENSOR ref_out = ctx.add(t_import.float_import("/fs/testData/ArgMax/out/ArgMax_0.idx")); + S_TENSOR ref_out = ctx.add(t_import.float_import("/fs/testData/ArgMax/out/ArgMax_0.idx", "ref_out")); // Implementation goes here // modify the checks below: - TENSOR out = ctx.add(new RamTensor(ref_out.lock()->getShape())); + S_TENSOR out = ctx.add(new RamTensor(ref_out->getShape(), "out")); - TList inputs = {ref_a, ref_dim}; - TList outputs = {out}; + TNameList inputs = {"ref_a", "ref_dim"}; + TNameList outputs = {"out"}; - S_TENSOR ref_val = ref_out.lock(); - S_TENSOR out_val = out.lock(); timer_start(); ctx.push(new ArgMaxOp(), inputs, outputs); ctx.eval(); timer_stop(); - Tensor* out_float = TensorCast(out_val.get()); + Tensor* out_float = TensorCast(out.get(), "out_float"); ///NT: /WIP how to handle the name? - double result = meanPercentErr(ref_val.get(), out_float); + double result = meanPercentErr(ref_out.get(), out_float); // passed(result < 0.0001); passed(result == 0); @@ -207,35 +189,36 @@ class MathOpsTest : public Test { void argmaxTest2(void) { // NT: WIP do not use t_import int 64 here testStart("argmax2"); - TENSOR test_input = ctx.add(TensorConstant({10, 5}, 0.0f)); - *(test_input.lock()->write(25, 0)) = 1.0f; - *(test_input.lock()->write(26, 0)) = 1.0f; - *(test_input.lock()->write(7, 0)) = 1.0f; - *(test_input.lock()->write(48, 0)) = 1.0f; - *(test_input.lock()->write(14, 0)) = 1.0f; - - TENSOR test_dim = ctx.add(new RamTensor({1})); - *(test_dim.lock()->write(0, 0)) = 0; - - TENSOR test_out_ref = ctx.add(new RamTensor({5})); - *(test_out_ref.lock()->write(0, 0)) = 5.0f; - *(test_out_ref.lock()->write(1, 0)) = 5.0f; - *(test_out_ref.lock()->write(2, 0)) = 1.0f; - *(test_out_ref.lock()->write(3, 0)) = 9.0f; - *(test_out_ref.lock()->write(4, 0)) = 2.0f; - - TENSOR test_out = ctx.add(new RamTensor(test_out_ref.lock()->getShape())); - TList inputs = {test_input, test_dim}; - TList outputs = {test_out}; - S_TENSOR ref_val = test_out_ref.lock(); - S_TENSOR out_val = test_out.lock(); + + ctx.gc(); + + S_TENSOR test_input = ctx.add(TensorConstant({10, 5}, 0.0f, "test_input")); + *(test_input->write(25, 0)) = 1.0f; + *(test_input->write(26, 0)) = 1.0f; + *(test_input->write(7, 0)) = 1.0f; + *(test_input->write(48, 0)) = 1.0f; + *(test_input->write(14, 0)) = 1.0f; + + S_TENSOR test_dim = ctx.add(new RamTensor({1}, "test_dim")); + *(test_dim->write(0, 0)) = 0; + + S_TENSOR test_out_ref = ctx.add(new RamTensor({5}, "test_out_ref")); + *(test_out_ref->write(0, 0)) = 5.0f; + *(test_out_ref->write(1, 0)) = 5.0f; + *(test_out_ref->write(2, 0)) = 1.0f; + *(test_out_ref->write(3, 0)) = 9.0f; + *(test_out_ref->write(4, 0)) = 2.0f; + + S_TENSOR test_out = ctx.add(new RamTensor(test_out_ref->getShape(), "test_out")); + TNameList inputs = {"test_input", "test_dim"}; + TNameList outputs = {"test_out"}; timer_start(); ctx.push(new ArgMaxOp(), inputs, outputs); ctx.eval(); timer_stop(); - double result = meanPercentErr(ref_val.get(), out_val.get()); + double result = meanPercentErr(test_out_ref.get(), test_out.get()); passed(result < 0.0001); //passed(result == 0); } @@ -244,29 +227,24 @@ class MathOpsTest : public Test { testStart("add"); // reference inputs - TENSOR a = - ctx.add(t_import.float_import("/fs/testData/ref_add/in/Const_5_0.idx")); - TENSOR b = - ctx.add(t_import.float_import("/fs/testData/ref_add/in/Const_6_0.idx")); + ctx.add(t_import.float_import("/fs/testData/ref_add/in/Const_5_0.idx", "a")); + ctx.add(t_import.float_import("/fs/testData/ref_add/in/Const_6_0.idx", "b")); // reference outputs - TENSOR ref_out = - ctx.add(t_import.float_import("/fs/testData/ref_add/out/ref_add_0.idx")); + S_TENSOR ref_out = ctx.add(t_import.float_import("/fs/testData/ref_add/out/ref_add_0.idx", "ref_out")); // Implementation goes here // modify the checks below: - TENSOR out = ctx.add(new RamTensor(ref_out.lock()->getShape())); - S_TENSOR out_vxx = out.lock(); - S_TENSOR ref_vxx = ref_out.lock(); - TList inputs = {a, b}; - TList outputs = {out}; + S_TENSOR out = ctx.add(new RamTensor(ref_out->getShape(), "out")); + TNameList inputs = {"a", "b"}; + TNameList outputs = {"out"}; timer_start(); ctx.push(new AddOp(), inputs, outputs); ctx.eval(); timer_stop(); - double result = meanPercentErr(ref_vxx.get(), out_vxx.get()); + double result = meanPercentErr(ref_out.get(), out.get()); passed(result < 0.0001); //passed(result == 0); } @@ -274,30 +252,28 @@ class MathOpsTest : public Test { void minTest(void) { testStart("min"); + ctx.gc(); + // reference inputs - TENSOR a = - ctx.add(t_import.float_import("/fs/testData/ref_min/in/Const_2_0.idx")); - TENSOR dim = - ctx.add(t_import.int_import("/fs/testData/ref_min/in/Const_3_0.idx")); + ctx.add(t_import.float_import("/fs/testData/ref_min/in/Const_2_0.idx", "a")); + ctx.add(t_import.int_import("/fs/testData/ref_min/in/Const_3_0.idx", "dim")); // reference outputs - TENSOR ref_out = - ctx.add(t_import.float_import("/fs/testData/ref_min/out/ref_min_0.idx")); + S_TENSOR ref_out = ctx.add(t_import.float_import("/fs/testData/ref_min/out/ref_min_0.idx", "ref_out")); // Implementation goes here // modify the checks below: - TENSOR out = ctx.add(new RamTensor(ref_out.lock()->getShape())); - TList inputs = {a, dim}; - TList outputs = {out}; - S_TENSOR ref_val = ref_out.lock(); - S_TENSOR out_val = out.lock(); + S_TENSOR out = ctx.add(new RamTensor(ref_out->getShape(), "out")); + TNameList inputs = {"a", "dim"}; + TNameList outputs = {"out"}; + timer_start(); ctx.push(new MinOp(), inputs, outputs); ctx.eval(); timer_stop(); - double result = meanPercentErr(ref_val.get(), out_val.get()); + double result = meanPercentErr(ref_out.get(), out.get()); // passed(result < 0.0001); passed(result == 0); } @@ -305,30 +281,27 @@ class MathOpsTest : public Test { void maxTest(void) { testStart("max"); + ctx.gc(); + // reference inputs - TENSOR a = - ctx.add(t_import.float_import("/fs/testData/ref_max/in/Const_2_0.idx")); - TENSOR dim = - ctx.add(t_import.int_import("/fs/testData/ref_max/in/Const_4_0.idx")); + ctx.add(t_import.float_import("/fs/testData/ref_max/in/Const_2_0.idx", "a")); + ctx.add(t_import.int_import("/fs/testData/ref_max/in/Const_4_0.idx", "dim")); // reference outputs - TENSOR ref_out = - ctx.add(t_import.float_import("/fs/testData/ref_max/out/ref_max_0.idx")); + S_TENSOR ref_out = ctx.add(t_import.float_import("/fs/testData/ref_max/out/ref_max_0.idx", "ref_out")); // Implementation goes here // modify the checks below: - TENSOR out = ctx.add(new RamTensor(ref_out.lock()->getShape())); - TList inputs = {a, dim}; - TList outputs = {out}; - S_TENSOR ref_val = ref_out.lock(); - S_TENSOR out_val = out.lock(); + S_TENSOR out = ctx.add(new RamTensor(ref_out->getShape(), "out")); + TNameList inputs = {"a", "dim"}; + TNameList outputs = {"out"}; timer_start(); ctx.push(new MaxOp(), inputs, outputs); ctx.eval(); timer_stop(); - double result = meanPercentErr(ref_val.get(), out_val.get()); + double result = meanPercentErr(ref_out.get(), out.get()); // passed(result < 0.0001); passed(result == 0); } diff --git a/MatrixTests.hpp b/MatrixTests.hpp index d72815a9..b42fa0c4 100644 --- a/MatrixTests.hpp +++ b/MatrixTests.hpp @@ -13,59 +13,46 @@ class matrixOpsTest : public Test { void qMatMul(void) { testStart("Quantized Matrix Mul"); + + ctx.gc(); + //inputs - TENSOR a = - ctx.add(t_import.ubyte_import("/fs/testData/qMatMul/in/qA_0.idx")); - TENSOR a_min = - ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qA_1.idx")); - TENSOR a_max = - ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qA_2.idx")); - TENSOR b = - ctx.add(t_import.ubyte_import("/fs/testData/qMatMul/in/qB_0.idx")); - TENSOR b_min = - ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qB_1.idx")); - TENSOR b_max = - ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qB_2.idx")); + ctx.add(t_import.ubyte_import("/fs/testData/qMatMul/in/qA_0.idx", "a")); + ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qA_1.idx", "a_min")); + ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qA_2.idx", "a_max")); + ctx.add(t_import.ubyte_import("/fs/testData/qMatMul/in/qB_0.idx", "b")); + ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qB_1.idx", "b_min")); + ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qB_2.idx", "b_max")); // reference outputs - TENSOR c = - ctx.add(t_import.int_import("/fs/testData/qMatMul/out/qMatMul_0.idx")); - TENSOR c_min = - ctx.add(t_import.float_import("/fs/testData/qMatMul/out/qMatMul_1.idx")); - TENSOR c_max = - ctx.add(t_import.float_import("/fs/testData/qMatMul/out/qMatMul_2.idx")); + S_TENSOR c = ctx.add(t_import.int_import("/fs/testData/qMatMul/out/qMatMul_0.idx", "c")); + S_TENSOR c_min = ctx.add(t_import.float_import("/fs/testData/qMatMul/out/qMatMul_1.idx", "c_min")); + S_TENSOR c_max = ctx.add(t_import.float_import("/fs/testData/qMatMul/out/qMatMul_2.idx", "c_max")); //we need default constructor here //so we can get ride of the shapes here - TENSOR out_c = ctx.add(new RamTensor(c.lock()->getShape())); - TENSOR out_min = ctx.add(new RamTensor(c_min.lock()->getShape())); - TENSOR out_max = ctx.add(new RamTensor(c_max.lock()->getShape())); + S_TENSOR out_c = ctx.add(new RamTensor(c->getShape(), "out_c")); + S_TENSOR out_min = ctx.add(new RamTensor(c_min->getShape(), "out_min")); + S_TENSOR out_max = ctx.add(new RamTensor(c_max->getShape(), "out_max")); //TList inputs = {a, a_min, a_max, b, b_min, b_max}; //TList outputs = {out_c, out_min, out_max}; //if you want tensors to be alive after .eval() //copies of the share_pointer needs to be here - S_TENSOR ref_c_rptr = c.lock(); - S_TENSOR ref_min_rptr = c_min.lock(); - S_TENSOR ref_max_rptr = c_max.lock(); - S_TENSOR out_c_rptr = out_c.lock(); - S_TENSOR out_min_rptr = out_min.lock(); - S_TENSOR out_max_rptr = out_max.lock(); - timer_start(); //ctx.push(new QntMatMulOp(), inputs, outputs); ctx.push(new QntMatMulOp(), - {a, a_min, a_max, b, b_min, b_max}, - {out_c, out_min, out_max}); + {"a", "a_min", "a_max", "b", "b_min", "b_max"}, + {"out_c", "out_min", "out_max"}); ctx.eval(); timer_stop(); - double result = meanPercentErr(ref_c_rptr.get(), out_c_rptr.get()) + meanPercentErr(ref_min_rptr.get(), out_min_rptr.get()) + - meanPercentErr(ref_max_rptr.get(), out_max_rptr.get()); + double result = meanPercentErr(c.get(), out_c.get()) + meanPercentErr(c_min.get(), out_min.get()) + + meanPercentErr(c_max.get(), out_max.get()); passed(result == 0); } diff --git a/NnTests.hpp b/NnTests.hpp index ad867a77..e77f603b 100644 --- a/NnTests.hpp +++ b/NnTests.hpp @@ -13,45 +13,35 @@ class NnOpsTest : public Test { void reluTest(void) { testStart("quantized_relu"); // reference inputs - TENSOR a = - ctx.add(t_import.ubyte_import("/fs/testData/ref_qRelu/in/QuantizeV2_0.idx")); - TENSOR min = - ctx.add(t_import.float_import("/fs/testData/ref_qRelu/in/QuantizeV2_1.idx")); - TENSOR max = - ctx.add(t_import.float_import("/fs/testData/ref_qRelu/in/QuantizeV2_2.idx")); + S_TENSOR a = + ctx.add(t_import.ubyte_import("/fs/testData/ref_qRelu/in/QuantizeV2_0.idx", "a")); + S_TENSOR min = + ctx.add(t_import.float_import("/fs/testData/ref_qRelu/in/QuantizeV2_1.idx", "min")); + S_TENSOR max = + ctx.add(t_import.float_import("/fs/testData/ref_qRelu/in/QuantizeV2_2.idx", "max")); // reference outputs - TENSOR ref_out = - ctx.add(t_import.ubyte_import("/fs/testData/ref_qRelu/out/ref_qRelu_0.idx")); - TENSOR ref_min = - ctx.add(t_import.float_import("/fs/testData/ref_qRelu/out/ref_qRelu_1.idx")); - TENSOR ref_max = - ctx.add(t_import.float_import("/fs/testData/ref_qRelu/out/ref_qRelu_2.idx")); + S_TENSOR ref_out = + ctx.add(t_import.ubyte_import("/fs/testData/ref_qRelu/out/ref_qRelu_0.idx", "ref_out")); + S_TENSOR ref_min = + ctx.add(t_import.float_import("/fs/testData/ref_qRelu/out/ref_qRelu_1.idx", "ref_min")); + S_TENSOR ref_max = + ctx.add(t_import.float_import("/fs/testData/ref_qRelu/out/ref_qRelu_2.idx", "ref_max")); // modify the checks below: - TENSOR out = ctx.add(new RamTensor(ref_out.lock()->getShape())); - TENSOR out_min = ctx.add(new RamTensor(ref_min.lock()->getShape())); - TENSOR out_max = ctx.add(new RamTensor(ref_max.lock()->getShape())); + S_TENSOR out = ctx.add(new RamTensor(ref_out->getShape(), "out")); + S_TENSOR out_min = ctx.add(new RamTensor(ref_min->getShape(), "out_min")); + S_TENSOR out_max = ctx.add(new RamTensor(ref_max->getShape(), "out_max")); - //lock on to required output tensors - S_TENSOR ref_out_s = ref_out.lock(); - S_TENSOR ref_min_s = ref_min.lock(); - S_TENSOR ref_max_s = ref_max.lock(); - S_TENSOR out_s = out.lock(); - S_TENSOR out_min_s = out_min.lock(); - S_TENSOR out_max_s = out_max.lock(); - - TList inputs = {a, min, max}; - TList outputs = {out, out_min, out_max}; timer_start(); - ctx.push(new ReluOp(), inputs, outputs); + ctx.push(new ReluOp(), {"a", "min", "max"}, {"out", "out_min", "out_max"}); ctx.eval(); timer_stop(); - double result = meanPercentErr(ref_out_s.get(), out_s.get()) + - meanPercentErr(ref_min_s.get(), out_min_s.get()) + - meanPercentErr(ref_max_s.get(), out_max_s.get()); + double result = meanPercentErr(ref_out.get(), out.get()) + + meanPercentErr(ref_min.get(), out_min.get()) + + meanPercentErr(ref_max.get(), out_max.get()); // passed(result < 0.0001); passed(result == 0); } diff --git a/README.md b/README.md index fec3ef50..d81149a0 100644 --- a/README.md +++ b/README.md @@ -3,9 +3,14 @@ ## Introduction - uTensor is an extremely light-weight Deep-Learning Inference framework built on mbed and Tensorflow. + uTensor is an extremely light-weight Deep-Learning Inference framework built on Mbed and Tensorflow: + + - TensorFlow to uTensor exporter (planned) + - Tensor Classes + - Operators Classes + - Context, a resource and graph management class - This project is under going constant development. + This project is under going constant development. APIs are expected to update rapidly. ## Requirement @@ -55,6 +60,21 @@ prediction: 7 Currently, the binary runs the first sample of the [MNIST dataset](http://yann.lecun.com/exdb/mnist/) which contains a handwritten digit of number 7. Ths network architecture is a 3-layer Relu based MLP, as shown below: ![alt text](https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/image/mlp_mnist.png "mxnet Handwritten Digit Recognition") + + The related Tensorflow training script please refer to the [node-viewer](https://github.com/neil-tan/tf-node-viewer/blob/master/deep_mlp.py) project. + +##Exporting to uTensor + + At this time, exporting models to uTensor requires the [node-viewer](https://github.com/neil-tan/tf-node-viewer/) project. This tool is designed to view navigate TensorFlow graphs and export tensors to idx files. The idx files are then used by the uTensor. For further instruction, please refer to this [wiki-page](https://github.com/neil-tan/uTensor/wiki/Tensorflow-model-exporting-note). + + uTensor-CLI is a tool planned for the next releases. It would streamline the process of moving TensorFlow models to uTensor environment. + +##Reference + +- [TensorFlow](https://www.tensorflow.org) +- [Mbed](https://developer.mbed.org) +- [Node-Viewer](https://github.com/neil-tan/tf-node-viewer/) +- [How to Quantize Neural Networks with TensorFlow](https://petewarden.com/2016/05/03/how-to-quantize-neural-networks-with-tensorflow/) +- [mxnet Handwritten Digit Recognition](https://mxnet.incubator.apache.org/tutorials/python/mnist.html) + - - The related Tensorflow training script please refer to the [node-viewer](https://github.com/neil-tan/tf-node-viewer/blob/master/deep_mlp.py) project. diff --git a/context.cpp b/context.cpp index 357c4fe7..ddaebbca 100644 --- a/context.cpp +++ b/context.cpp @@ -1,8 +1,9 @@ #include "context.hpp" -TENSOR Context::add(Tensor* t, uint8_t init_count) { - if(rTable.find(t) != rTable.end()) { - ERR_EXIT("tensor pointer address already exist in rTable"); +S_TENSOR Context::add(Tensor* t, uint8_t init_count) { + if(t == nullptr) { ERR_EXIT("null pointer tensor"); } + if(rTable.find(t->getName()) != rTable.end()) { + ERR_EXIT("tensor with name \"%s\" address already exist in rTable", t->getName().c_str()); } S_TENSOR _sptr(t); @@ -13,28 +14,46 @@ TENSOR Context::add(Tensor* t, uint8_t init_count) { record.count = init_count; record.allow_incr = false; } + record.sptr = _sptr; - rTable[t] = record; + rTable[t->getName()] = record; - TENSOR wptr = _sptr; + return _sptr; +} - return wptr; +S_TENSOR Context::get(TName const &t_name) { + if(rTable.find(t_name) == rTable.end()) ERR_EXIT("No tensor with name: %s", t_name.c_str()); + return rTable[t_name].sptr; } -void Context::push(Operator *op, TList &_inputs, TList &_outputs) { +void Context::push(Operator *op, TNameList &in_names, TNameList &out_names) { //error checking in the Op class + S_TList _inputs; + for(auto in:in_names) { + if(rTable.find(in) == rTable.end()) { ERR_EXIT("Tensor \"%s\" not found", in.c_str()); } + Ref_Record r = rTable[in]; + _inputs.push_back(r.sptr); + } + + S_TList _outputs; + for(auto out:out_names) { + if(rTable.find(out) == rTable.end()) { ERR_EXIT("Tensor \"%s\" not found", out.c_str()); } + Ref_Record r = rTable[out]; + _outputs.push_back(r.sptr); + } + op->setInputs(_inputs); op->setOutputs(_outputs); op_list.push_back(op); - incrTListRef(_inputs); + incrTNameListRef(in_names); } -void Context::push(Operator *op, std::initializer_list _inputs, std::initializer_list _outputs) { - TList inputs; - TList outputs; +void Context::push(Operator *op, std::initializer_list _inputs, std::initializer_list _outputs) { + TNameList inputs; + TNameList outputs; for(auto i:_inputs) { inputs.push_back(i); @@ -47,17 +66,16 @@ void Context::push(Operator *op, std::initializer_list _inputs, std::ini push(op, inputs, outputs); } -void Context::incrTListRef(const TList &t_list) { - for(auto t:t_list) { - Tensor* ptr = t.lock().get(); - if(rTable.find(ptr) == rTable.end()) { +void Context::incrTNameListRef(const TNameList &t_list) { + for(auto t_name:t_list) { + if(rTable.find(t_name) == rTable.end()) { ERR_EXIT("tensor not registered"); } - Ref_Record record = rTable[ptr]; + Ref_Record record = rTable[t_name]; if(record.allow_incr) { record.count++; - rTable[ptr] = record; + rTable[t_name] = record; } //if an initial ref value is supplied to the tensor at compile time @@ -79,34 +97,34 @@ void Context::deinitTensors(const S_TList &t_list) { } } -void Context::delTensor(Tensor* t) { - Ref_Record record = rTable[t]; +void Context::delTensor(TName t_name) { + Ref_Record record = rTable[t_name]; record.sptr.reset(); - rTable.erase(t); + rTable.erase(t_name); } void Context::dcrListRef(S_TList t_list) { for(auto t:t_list) { - if(dcrRef(t.get()) < 1) { - delTensor(t.get()); + if(dcrRef(t->getName()) < 1) { + delTensor(t->getName()); } } } -uint8_t Context::dcrRef(Tensor* t) { - if(!isTracked(t)) { +uint8_t Context::dcrRef(TName t_name) { + if(!isTracked(t_name)) { ERR_EXIT("Tensor not registered"); } - Ref_Record record = rTable[t]; + Ref_Record record = rTable[t_name]; if(record.count > 0) record.count -= 1; - rTable[t] = record; + rTable[t_name] = record; return record.count; } -bool Context::isTracked(Tensor* t) { - return (rTable.find(t) != rTable.end()); +bool Context::isTracked(TName t_name) { + return (rTable.find(t_name) != rTable.end()); } int Context::eval(void) { @@ -132,4 +150,22 @@ int Context::eval(void) { op_list.clear(); return 0; -} \ No newline at end of file +} + +uint32_t Context::gc(void) { + TNameList nlist; + + for ( auto it : rTable) { + Ref_Record r = it.second; + if(r.count < 1) { + nlist.push_back(it.first); + } + } + + for(auto name:nlist) { + delTensor(name); + } + + return (uint32_t) nlist.size(); +} + diff --git a/context.hpp b/context.hpp index ce715a9a..15076833 100644 --- a/context.hpp +++ b/context.hpp @@ -26,7 +26,7 @@ class Context : public uTensor { std::vector op_list; bool del_onsight; - std::unordered_map rTable; //all tensors alive //kill all unused if malloc failed? + std::unordered_map rTable; //all tensors alive //kill all unused if malloc failed? //uint32_t m_size; //remaining memory size //void registerTensor(Tensor* t); //void gc(void); //garbage collector, delete any tracked unreferenced tensor @@ -34,18 +34,21 @@ class Context : public uTensor { void initTensors(const S_TList &t_list); void deinitTensors(const S_TList &t_list); - void incrTListRef(const TList &t_list); + void incrTNameListRef(const TNameList &t_list); void dcrListRef(S_TList t_list); - void delTensor(Tensor* t); + void delTensor(TName t); //uint16_t incrRef(std::shared_ptr sptr); - uint8_t dcrRef(Tensor* t); - bool isTracked(Tensor* t); + uint8_t dcrRef(TName name); + bool isTracked(TName name); + //bool isTracked(Tensor* t); //uint16_t getRef(); public: - TENSOR add(Tensor* t, uint8_t init_count = 0); - void push(Operator *op, TList &_inputs, TList &_outputs); - void push(Operator *op, std::initializer_list _inputs, std::initializer_list _outputs); + S_TENSOR add(Tensor* t, uint8_t init_count = 0); + S_TENSOR get(TName const &t_name); + void push(Operator *op, TNameList &_inputs, TNameList &_outputs); + void push(Operator *op, std::initializer_list _inputs, std::initializer_list _outputs); + uint32_t gc(void); int eval(void); Context() { diff --git a/context_test.hpp b/context_test.hpp index 5d1ff4c2..607628ed 100644 --- a/context_test.hpp +++ b/context_test.hpp @@ -18,99 +18,45 @@ class contextTest : public Test { Context ctx; public: - -// void MatMalTest(void) { -// testStart("Context QntMatMal Op"); -// //inputs -// TENSOR a = -// ctx.add(t_import.ubyte_import("/fs/testData/qMatMul/in/qA_0.idx")); -// TENSOR a_min = -// ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qA_1.idx")); -// TENSOR a_max = -// ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qA_2.idx")); -// TENSOR b = -// ctx.add(t_import.ubyte_import("/fs/testData/qMatMul/in/qB_0.idx")); -// TENSOR b_min = -// ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qB_1.idx")); -// TENSOR b_max = -// ctx.add(t_import.float_import("/fs/testData/qMatMul/in/qB_2.idx")); - -// // reference outputs -// TENSOR c = -// ctx.add(t_import.int_import("/fs/testData/qMatMul/out/qMatMul_0.idx")); -// TENSOR c_min = -// ctx.add(t_import.float_import("/fs/testData/qMatMul/out/qMatMul_1.idx")); -// TENSOR c_max = -// ctx.add(t_import.float_import("/fs/testData/qMatMul/out/qMatMul_2.idx")); - - -// //we need default constructor here -// //so we can get ride of the shapes here -// TENSOR out_c = ctx.add(new RamTensor(c.lock()->getShape())); -// TENSOR out_min = ctx.add(new RamTensor(c_min.lock()->getShape())); -// TENSOR out_max = ctx.add(new RamTensor(c_max.lock()->getShape())); - -// TList inputs = {a, a_min, a_max, b, b_min, b_max}; -// TList outputs = {out_c, out_min, out_max}; - -// //if you want tensors to be alive after .eval() -// //copies of the share_pointer needs to be here -// S_TENSOR ref_c_rptr = c.lock(); -// S_TENSOR ref_min_rptr = c_min.lock(); -// S_TENSOR ref_max_rptr = c_max.lock(); -// S_TENSOR out_c_rptr = out_c.lock(); -// S_TENSOR out_min_rptr = out_min.lock(); -// S_TENSOR out_max_rptr = out_max.lock(); - - -// timer_start(); -// ctx.push(new QntMatMulOp(), inputs, outputs); -// ctx.eval(); -// timer_stop(); - -// double result = meanPercentErr(ref_c_rptr.get(), out_c_rptr.get()) + meanPercentErr(ref_min_rptr.get(), out_min_rptr.get()) + -// meanPercentErr(ref_max_rptr.get(), out_max_rptr.get()); - -// passed(result == 0); -// } - void RefCountTest(void) { testStart("Context Ref Count"); + + ctx.gc(); + timer_start(); //inputs - TENSOR a = ctx.add(new RamTensor({1,1,1})); - TENSOR b = ctx.add(new RamTensor({1,1,1})); - TENSOR c = ctx.add(new RamTensor({1,1,1})); + S_TENSOR a = ctx.add(new RamTensor({1,1,1}, "a")); + S_TENSOR b = ctx.add(new RamTensor({1,1,1}, "b")); + S_TENSOR c = ctx.add(new RamTensor({1,1,1}, "c")); //init values - *(a.lock()->write(0, 0)) = 1; - *(b.lock()->write(0, 0)) = 1; - *(c.lock()->write(0, 0)) = 1; + *(a->write(0, 0)) = 1; + *(b->write(0, 0)) = 1; + *(c->write(0, 0)) = 1; // reference outputs - TENSOR out = ctx.add(new RamTensor({1,1,1})); - S_TENSOR shr_out = out.lock(); + S_TENSOR out = ctx.add(new RamTensor({1,1,1}, "out")); - TList inputs0 = {a, b}; - TList outputs0 = {c}; //2 + TNameList inputs0 = {"a", "b"}; + TNameList outputs0 = {"c"}; //2 ctx.push(new AddOp(), inputs0, outputs0); - TList inputs1 = {c, a}; - TList outputs1 = {b}; //3 + TNameList inputs1 = {"c", "a"}; + TNameList outputs1 = {"b"}; //3 ctx.push(new AddOp(), inputs1, outputs1); - TList inputs2 = {a, b}; - TList outputs2 = {out}; //4 + TNameList inputs2 = {"a", "b"}; + TNameList outputs2 = {"out"}; //4 ctx.push(new AddOp(), inputs2, outputs2); ctx.eval(); timer_stop(); - if(a.lock() || b.lock() || c.lock() || !out.lock()) { + if(a.use_count() != 1 || b.use_count() != 1 || c.use_count() != 1 || out.use_count() != 2) { failed(); return; } - int result = *(shr_out->read(0, 0)); + int result = *(out->read(0, 0)); passed(result == 4); } diff --git a/deep_mnist_mlp.cpp b/deep_mnist_mlp.cpp index 19f262e9..511bff7d 100644 --- a/deep_mnist_mlp.cpp +++ b/deep_mnist_mlp.cpp @@ -1,194 +1,184 @@ #include "deep_mnist_mlp.hpp" -void tensorQuantize(Context& ctx, TENSOR input, TENSOR output, - TENSOR out_min, TENSOR out_max) { +void tensorQuantize(Context& ctx, TName input, TName output, + TName out_min, TName out_max) { //reshape - TENSOR reduce_dim = ctx.add(new RamTensor({1})); - TENSOR reshape_out = ctx.add(new RamTensor()); + S_TENSOR reduce_dim = ctx.add(new RamTensor({1}, "reduce_dim")); + S_TENSOR reshape_out = ctx.add(new RamTensor("reshape_out")); - TENSOR reshape_shape = ctx.add(new RamTensor()); + S_TENSOR reshape_shape = ctx.add(new RamTensor("reshape_shape")); - *(reduce_dim.lock()->write(0, 0)) = 0; - ctx.push(new ReshapeOp(), {input, reshape_shape}, {reshape_out}); + *(reduce_dim->write(0, 0)) = 0; + ctx.push(new ReshapeOp(), {input, "reshape_shape"}, {"reshape_out"}); //Min and Max of (reshaped) input - TENSOR min_out = ctx.add(new RamTensor({1})); - TENSOR max_out = ctx.add(new RamTensor({1})); - ctx.push(new MinOp(), {reshape_out, reduce_dim}, {min_out}); - ctx.push(new MaxOp(), {reshape_out, reduce_dim}, {max_out}); + S_TENSOR min_out = ctx.add(new RamTensor({1}, "min_out")); + S_TENSOR max_out = ctx.add(new RamTensor({1}, "max_out")); + ctx.push(new MinOp(), {"reshape_out", "reduce_dim"}, {"min_out"}); + ctx.push(new MaxOp(), {"reshape_out", "reduce_dim"}, {"max_out"}); - ctx.push(new QuantizeV2Op(), {reshape_out, min_out, max_out}, {output, out_min, out_max}); + ctx.push(new QuantizeV2Op(), {"reshape_out", "min_out", "max_out"}, {output, out_min, out_max}); } -void ReluLayer(Context& ctx, TENSOR x, TENSOR x_min, TENSOR x_max, - TENSOR w, TENSOR w_min, TENSOR w_max, TENSOR b, - TENSOR z_output) { +void ReluLayer(Context& ctx, TName x, TName x_min, TName x_max, + TName w, TName w_min, TName w_max, TName b, + TName z_output) { //quantized matmul - TENSOR out_c = ctx.add(new RamTensor()); + S_TENSOR out_c = ctx.add(new RamTensor("out_c")); - TENSOR matmul_out_min = ctx.add(new RamTensor({1})); - TENSOR matmul_out_max = ctx.add(new RamTensor({1})); + S_TENSOR matmul_out_min = ctx.add(new RamTensor({1}, "matmul_out_min")); + S_TENSOR matmul_out_max = ctx.add(new RamTensor({1}, "matmul_out_max")); - ctx.push(new QntMatMulOp(), {x, x_min, x_max, w, w_min, w_max}, {out_c, matmul_out_min, matmul_out_max}); + ctx.push(new QntMatMulOp(), {x, x_min, x_max, w, w_min, w_max}, {"out_c", "matmul_out_min", "matmul_out_max"}); //Requantization_Range - TENSOR req_out_min = ctx.add(new RamTensor({1})); - TENSOR req_out_max = ctx.add(new RamTensor({1})); - ctx.push(new Requantization_RangeOp(), {out_c, matmul_out_min, matmul_out_max}, {req_out_min, req_out_max}); + S_TENSOR req_out_min = ctx.add(new RamTensor({1}, "req_out_min")); + S_TENSOR req_out_max = ctx.add(new RamTensor({1}, "req_out_max")); + ctx.push(new Requantization_RangeOp(), {"out_c", "matmul_out_min", "matmul_out_max"}, {"req_out_min", "req_out_max"}); //Requantize - TENSOR reqnt_out = ctx.add(new RamTensor()); - TENSOR reqnt_out_min = ctx.add(new RamTensor({1})); - TENSOR reqnt_out_max = ctx.add(new RamTensor({1})); - ctx.push(new RequantizeOp(), {out_c, matmul_out_min, matmul_out_max, req_out_min, req_out_max}, {reqnt_out, reqnt_out_min, reqnt_out_max}); + S_TENSOR reqnt_out = ctx.add(new RamTensor("reqnt_out")); + S_TENSOR reqnt_out_min = ctx.add(new RamTensor({1}, "reqnt_out_min")); + S_TENSOR reqnt_out_max = ctx.add(new RamTensor({1}, "reqnt_out_max")); + ctx.push(new RequantizeOp(), {"out_c", "matmul_out_min", "matmul_out_max", "req_out_min", "req_out_max"}, {"reqnt_out", "reqnt_out_min", "reqnt_out_max"}); - Shape out_shape = out_c.lock()->getShape(); + Shape out_shape = out_c->getShape(); //clean up - TENSOR deqnt_out = ctx.add(new RamTensor()); - ctx.push(new DequantizeOp(), {reqnt_out, reqnt_out_min, reqnt_out_max}, {deqnt_out}); + S_TENSOR deqnt_out = ctx.add(new RamTensor("deqnt_out")); + ctx.push(new DequantizeOp(), {"reqnt_out", "reqnt_out_min", "reqnt_out_max"}, {"deqnt_out"}); - ctx.push(new AddOp(), {deqnt_out, b}, {z_output}); + ctx.push(new AddOp(), {"deqnt_out", b}, {z_output}); } -void PredLayer(Context &ctx, TENSOR input, TENSOR input_min, - TENSOR input_max, TENSOR output, TENSOR w, TENSOR w_min, TENSOR w_max, TENSOR bias, TENSOR dim) { +void PredLayer(Context &ctx, TName input, TName input_min, + TName input_max, TName output, TName w, TName w_min, TName w_max, TName bias, TName dim) { - TENSOR out_c = ctx.add(new RamTensor()); - TENSOR matmul_out_min = ctx.add(new RamTensor({1})); - TENSOR matmul_out_max = ctx.add(new RamTensor({1})); + S_TENSOR out_mat_pred = ctx.add(new RamTensor("out_mat_pred")); + S_TENSOR matmul_out_min_pred = ctx.add(new RamTensor({1}, "matmul_out_min_pred")); + S_TENSOR matmul_out_max_pred = ctx.add(new RamTensor({1}, "matmul_out_max_pred")); //MatMul ctx.push(new QntMatMulOp(), {input, input_min, input_max, w, w_min, w_max}, - {out_c, matmul_out_min, matmul_out_max}); + {"out_mat_pred", "matmul_out_min_pred", "matmul_out_max_pred"}); //Requantization_Range - TENSOR req_out_min = ctx.add(new RamTensor({1})); - TENSOR req_out_max = ctx.add(new RamTensor({1})); - ctx.push(new Requantization_RangeOp(), {out_c, matmul_out_min, matmul_out_max}, - {req_out_min, req_out_max}); + S_TENSOR req_out_min = ctx.add(new RamTensor({1}, "req_out_min_pred")); + S_TENSOR req_out_max = ctx.add(new RamTensor({1}, "req_out_max_pred")); + ctx.push(new Requantization_RangeOp(), {"out_mat_pred", "matmul_out_min_pred", "matmul_out_max_pred"}, + {"req_out_min_pred", "req_out_max_pred"}); //Requantize - TENSOR reqnt_out = ctx.add(new RamTensor()); - TENSOR reqnt_out_min = ctx.add(new RamTensor({1})); - TENSOR reqnt_out_max = ctx.add(new RamTensor({1})); - ctx.push(new RequantizeOp(), {out_c, matmul_out_min, matmul_out_max, req_out_min, req_out_max}, - {reqnt_out, reqnt_out_min, reqnt_out_max}); + S_TENSOR reqnt_out = ctx.add(new RamTensor("reqnt_out_pred")); + S_TENSOR reqnt_out_min = ctx.add(new RamTensor({1}, "reqnt_out_min_pred")); + S_TENSOR reqnt_out_max = ctx.add(new RamTensor({1}, "reqnt_out_max_pred")); + ctx.push(new RequantizeOp(), {"out_mat_pred", "matmul_out_min_pred", "matmul_out_max_pred", "req_out_min_pred", "req_out_max_pred"}, + {"reqnt_out_pred", "reqnt_out_min_pred", "reqnt_out_max_pred"}); //dequantize - TENSOR deqnt_out = ctx.add(new RamTensor()); - ctx.push(new DequantizeOp(), {reqnt_out, reqnt_out_min, reqnt_out_max}, {deqnt_out}); + S_TENSOR deqnt_out = ctx.add(new RamTensor("deqnt_out_pred")); + ctx.push(new DequantizeOp(), {"reqnt_out_pred", "reqnt_out_min_pred", "reqnt_out_max_pred"}, {"deqnt_out_pred"}); //Add - TENSOR output_z = ctx.add(new RamTensor()); - ctx.push(new AddOp(), {deqnt_out, bias}, {output_z}); + S_TENSOR output_z = ctx.add(new RamTensor("output_z_pred")); + ctx.push(new AddOp(), {"deqnt_out_pred", bias}, {"output_z_pred"}); //ArgMax - ctx.push(new ArgMaxOp(), {output_z, dim}, {output}); + ctx.push(new ArgMaxOp(), {"output_z_pred", dim}, {output}); } int runMLP(string inputIdxFile) { TensorIdxImporter t_import; Context ctx; - TENSOR x_quantized = ctx.add(new RamTensor()); - TENSOR x_min = ctx.add(new RamTensor({1})); - TENSOR x_max = ctx.add(new RamTensor({1})); - TENSOR x = ctx.add(t_import.float_import(inputIdxFile)); - S_TENSOR xs_quantized = x_quantized.lock(); - S_TENSOR xs_min = x_min.lock(); - S_TENSOR xs_max = x_max.lock(); - - tensorQuantize(ctx, x, x_quantized, x_min, x_max); + S_TENSOR x_quantized = ctx.add(new RamTensor("x_quantized")); + S_TENSOR x_min = ctx.add(new RamTensor({1}, "x_min")); + S_TENSOR x_max = ctx.add(new RamTensor({1}, "x_max")); + S_TENSOR x = ctx.add(t_import.float_import(inputIdxFile, "x")); + + tensorQuantize(ctx, "x", "x_quantized", "x_min", "x_max"); ctx.eval(); //relu layer first - TENSOR w = ctx.add(t_import.ubyte_import( - "/fs/testData/deep_mlp/import-Variable_quint8_const_0.idx")); - TENSOR w_min = - ctx.add(t_import.float_import("/fs/testData/deep_mlp/import-Variable_min_0.idx")); - TENSOR w_max = - ctx.add(t_import.float_import("/fs/testData/deep_mlp/import-Variable_max_0.idx")); - TENSOR b = - ctx.add(t_import.float_import("/fs/testData/deep_mlp/import-Variable_1_0.idx")); - TENSOR relu_output = ctx.add(new RamTensor()); - TENSOR relu_min = ctx.add(new RamTensor({1})); - TENSOR relu_max = ctx.add(new RamTensor({1})); - S_TENSOR relus_output = relu_output.lock(); - S_TENSOR relus_min = relu_min.lock(); - S_TENSOR relus_max = relu_max.lock(); - TENSOR z_output = ctx.add(new RamTensor()); - - ReluLayer(ctx, x_quantized, x_min, x_max, w, w_min, w_max, b, z_output); - - TENSOR z_qnt_output = ctx.add(new RamTensor()); - TENSOR z_min = ctx.add(new RamTensor({1})); - TENSOR z_max = ctx.add(new RamTensor({1})); - tensorQuantize(ctx, z_output, z_qnt_output, z_min, z_max); - - ctx.push(new ReluOp(), {z_qnt_output, z_min, z_max}, {relu_output, relu_min, relu_max}); + S_TENSOR w = ctx.add(t_import.ubyte_import( + "/fs/testData/deep_mlp/import-Variable_quint8_const_0.idx", "w")); + S_TENSOR w_min = + ctx.add(t_import.float_import("/fs/testData/deep_mlp/import-Variable_min_0.idx", "w_min")); + S_TENSOR w_max = + ctx.add(t_import.float_import("/fs/testData/deep_mlp/import-Variable_max_0.idx", "w_max")); + S_TENSOR b = + ctx.add(t_import.float_import("/fs/testData/deep_mlp/import-Variable_1_0.idx", "b")); + S_TENSOR relu_output = ctx.add(new RamTensor("relu_output")); + S_TENSOR relu_min = ctx.add(new RamTensor({1}, "relu_min")); + S_TENSOR relu_max = ctx.add(new RamTensor({1}, "relu_max")); + S_TENSOR z_output = ctx.add(new RamTensor("z_output")); + + ReluLayer(ctx, "x_quantized", "x_min", "x_max", "w", "w_min", "w_max", "b", "z_output"); + + S_TENSOR z_qnt_output = ctx.add(new RamTensor("z_qnt_output")); + S_TENSOR z_min = ctx.add(new RamTensor({1}, "z_min")); + S_TENSOR z_max = ctx.add(new RamTensor({1}, "z_max")); + tensorQuantize(ctx, "z_output", "z_qnt_output", "z_min", "z_max"); + + ctx.push(new ReluOp(), {"z_qnt_output", "z_min", "z_max"}, {"relu_output", "relu_min", "relu_max"}); ctx.eval(); //relu layer 2 - TENSOR w2 = ctx.add(t_import.ubyte_import( - "/fs/testData/deep_mlp/import-Variable_2_quint8_const_0.idx")); - TENSOR w_min2 = ctx.add(t_import.float_import( - "/fs/testData/deep_mlp/import-Variable_2_min_0.idx")); - TENSOR w_max2 = ctx.add(t_import.float_import( - "/fs/testData/deep_mlp/import-Variable_2_max_0.idx")); - TENSOR b2 = ctx.add(t_import.float_import("/fs/testData/deep_mlp/import-Variable_3_0.idx")); - TENSOR relu_output2 = ctx.add(new RamTensor()); - TENSOR relu_min2 = ctx.add(new RamTensor({1})); - TENSOR relu_max2 = ctx.add(new RamTensor({1})); - - S_TENSOR relus_output2 = relu_output2.lock(); - S_TENSOR relus_min2 = relu_min2.lock(); - S_TENSOR relus_max2 = relu_max2.lock(); - TENSOR z_output2 = ctx.add(new RamTensor()); - ReluLayer(ctx, relu_output, relu_min, relu_max, w2, w_min2, w_max2, b2, z_output2); - - - TENSOR z_qnt_output2 = ctx.add(new RamTensor()); - TENSOR z_min2 = ctx.add(new RamTensor({1})); - TENSOR z_max2 = ctx.add(new RamTensor({1})); - tensorQuantize(ctx, z_output2, z_qnt_output2, z_min2, z_max2); - - ctx.push(new ReluOp(), {z_qnt_output2, z_min2, z_max2}, {relu_output2, relu_min2, relu_max2}); + S_TENSOR w2 = ctx.add(t_import.ubyte_import( + "/fs/testData/deep_mlp/import-Variable_2_quint8_const_0.idx", "w2")); + S_TENSOR w_min2 = ctx.add(t_import.float_import( + "/fs/testData/deep_mlp/import-Variable_2_min_0.idx", "w_min2")); + S_TENSOR w_max2 = ctx.add(t_import.float_import( + "/fs/testData/deep_mlp/import-Variable_2_max_0.idx", "w_max2")); + S_TENSOR b2 = ctx.add(t_import.float_import("/fs/testData/deep_mlp/import-Variable_3_0.idx", "b2")); + S_TENSOR relu_output2 = ctx.add(new RamTensor("relu_output2")); + S_TENSOR relu_min2 = ctx.add(new RamTensor({1}, "relu_min2")); + S_TENSOR relu_max2 = ctx.add(new RamTensor({1}, "relu_max2")); + + S_TENSOR z_output2 = ctx.add(new RamTensor("z_output2")); + ReluLayer(ctx, "relu_output", "relu_min", "relu_max", "w2", "w_min2", "w_max2", "b2", "z_output2"); + + + S_TENSOR z_qnt_output2 = ctx.add(new RamTensor("z_qnt_output2")); + S_TENSOR z_min2 = ctx.add(new RamTensor({1}, "z_min2")); + S_TENSOR z_max2 = ctx.add(new RamTensor({1}, "z_max2")); + tensorQuantize(ctx, "z_output2", "z_qnt_output2", "z_min2", "z_max2"); + + ctx.push(new ReluOp(), {"z_qnt_output2", "z_min2", "z_max2"}, {"relu_output2", "relu_min2", "relu_max2"}); ctx.eval(); - TENSOR w3 = ctx.add(t_import.ubyte_import( + S_TENSOR w3 = ctx.add(t_import.ubyte_import( "/fs/testData/deep_mlp/runPredLayer/MatMul_2_eightbit_quantized_mat_mul/" - "inputs/Variable_4_quint8_const_0.idx")); - TENSOR w2_min = ctx.add(t_import.float_import( + "inputs/Variable_4_quint8_const_0.idx", "w3")); + S_TENSOR w2_min = ctx.add(t_import.float_import( "/fs/testData/deep_mlp/runPredLayer/MatMul_2_eightbit_quantized_mat_mul/" - "inputs/Variable_4_min_0.idx")); - TENSOR w2_max = ctx.add(t_import.float_import( + "inputs/Variable_4_min_0.idx", "w2_min")); + S_TENSOR w2_max = ctx.add(t_import.float_import( "/fs/testData/deep_mlp/runPredLayer/MatMul_2_eightbit_quantized_mat_mul/" - "inputs/Variable_4_max_0.idx")); - TENSOR bias2 = ctx.add(t_import.float_import( - "/fs/testData/deep_mlp/runPredLayer/add_2/inputs/Variable_5_0.idx")); - TENSOR dim = ctx.add(t_import.int_import( + "inputs/Variable_4_max_0.idx", "w2_max")); + S_TENSOR bias2 = ctx.add(t_import.float_import( + "/fs/testData/deep_mlp/runPredLayer/add_2/inputs/Variable_5_0.idx", "bias2")); + S_TENSOR dim = ctx.add(t_import.int_import( "/fs/testData/deep_mlp/runPredLayer/y_pred/inputs/" - "y_pred-dimension_0.idx")); + "y_pred-dimension_0.idx", "dim")); - TENSOR pred = ctx.add(new RamTensor()); - PredLayer(ctx, relu_output2, relu_min2, relu_max2, pred, w3, w2_min, w2_max, bias2, dim); - S_TENSOR pred_val = pred.lock(); + S_TENSOR pred = ctx.add(new RamTensor("pred")); + PredLayer(ctx, "relu_output2", "relu_min2", "relu_max2", "pred", "w3", "w2_min", "w2_max", "bias2", "dim"); ctx.eval(); Tensor* ref_out = t_import.float_import( - "/fs/testData/deep_mlp/runPredLayer/y_pred/outputs/y_pred_0.idx"); - Tensor* ref_pred = TensorCast(ref_out); + "/fs/testData/deep_mlp/runPredLayer/y_pred/outputs/y_pred_0.idx", "ref_out"); + Tensor* ref_pred = TensorCast(ref_out, "ref_pred"); - double result = Test::meanPercentErr(ref_pred, pred_val.get()); + double result = Test::meanPercentErr(ref_pred, pred.get()); if (result < 0.0001) { printf("PASSED %.8f\r\n\r\n", result); @@ -196,6 +186,6 @@ int runMLP(string inputIdxFile) { printf("FAILED %.8f\r\n\r\n", result); } - return *(pred.lock()->read(0, 0)); + return *(pred->read(0, 0)); // output layer } diff --git a/deep_mnist_mlp.hpp b/deep_mnist_mlp.hpp index dd649cd9..39966b06 100644 --- a/deep_mnist_mlp.hpp +++ b/deep_mnist_mlp.hpp @@ -12,15 +12,15 @@ #include "uTensorBase.hpp" #include "context.hpp" -void tensorQuantize(Context& ctx, TENSOR input, TENSOR output, - TENSOR out_min, TENSOR out_max); +void tensorQuantize(Context& ctx, TName input, TName output, + TName out_min, TName out_max); -void ReluLayer(Context& ctx, TENSOR x, TENSOR x_min, TENSOR x_max, - TENSOR w, TENSOR w_min, TENSOR w_max, TENSOR b, - TENSOR z_output); +void ReluLayer(Context& ctx, TName x, TName x_min, TName x_max, + TName w, TName w_min, TName w_max, TName b, + TName z_output); -void PredLayer(Context &ctx, TENSOR input, TENSOR input_min, - TENSOR input_max, TENSOR output, TENSOR w, TENSOR w_min, TENSOR w_max, TENSOR bias, TENSOR dim); +void PredLayer(Context &ctx, TName input, TName input_min, + TName input_max, TName output, TName w, TName w_min, TName w_max, TName bias, TName dim); int runMLP(string inputIdxFile); diff --git a/main.cpp b/main.cpp index fbc0a164..6d2ef839 100644 --- a/main.cpp +++ b/main.cpp @@ -7,13 +7,16 @@ #include "tensorIdxImporterTests.hpp" #include "context.hpp" #include "ArrayTests.hpp" -#include "context_test.hpp" #include "MathTests.hpp" +#include "MatrixTests.hpp" +#include "context_test.hpp" #include "tensor_test.hpp" #include "NnTests.hpp" -#include "MatrixTests.hpp" #include "mlp_test.hpp" #include "deep_mnist_mlp.hpp" +#include "context_test.hpp" +#include "MathTests.hpp" +#include "MatrixTests.hpp" Serial pc(USBTX, USBRX, 115200); SDBlockDevice bd(MBED_CONF_APP_SD_MOSI, MBED_CONF_APP_SD_MISO, @@ -28,61 +31,61 @@ int main(int argc, char** argv) { printf("running deep-mlp...\r\n"); int prediction = runMLP("/fs/testData/deep_mlp/import-Placeholder_0.idx"); - printf("prediction: %d\r\n", prediction); - idxImporterTest idxTest; - idxTest.runAll(); - - - printf("IDX import:\r\n"); - idxTest.printSummary(); - - ArrayOpsTest arrayTests; - arrayTests.runAll(); - printf("Array: \r\n"); - arrayTests.printSummary(); - - printf("Math: \r\n"); - MathOpsTest mathTests; - mathTests.runAll(); - printf("Math result...\r\n"); - mathTests.printSummary(); - - printf("running matrix test ...\r\n"); - matrixOpsTest matrixTests; - matrixTests.runAll(); - matrixTests.printSummary(); - - printf("Context test: \r\n"); - contextTest ctxTest; - ctxTest.runAll(); - printf("Context result...\r\n"); - ctxTest.printSummary(); + printf("prediction: %d\r\n\r\n\r\n\r\n", prediction); + + printf("IDX import:\r\n"); + idxImporterTest idxTest; + idxTest.runAll(); + printf("IDX import result...\r\n"); + idxTest.printSummary(); + + printf("tesnor test: \r\n"); + tensorTest tenT; + tenT.runAll(); + printf("tensor result...\r\n"); + tenT.printSummary(); + + printf("Context test: \r\n"); + contextTest ctxTest; + ctxTest.runAll(); + printf("Context result...\r\n"); + ctxTest.printSummary(); + + printf("Transformation test: \r\n"); + transTest tTest; + tTest.runAll(); + printf("Transformation result...\r\n"); + tTest.printSummary(); + + printf("Array test: \r\n"); + ArrayOpsTest arrayTests; + arrayTests.runAll(); + printf("Array result...\r\n"); + arrayTests.printSummary(); + + printf("Math: \r\n"); + MathOpsTest mathTests; + mathTests.runAll(); + printf("Math result...\r\n"); + mathTests.printSummary(); + + printf("running matrix test:\r\n"); + matrixOpsTest matrixTests; + matrixTests.runAll(); + printf("running matrix result ...\r\n"); + matrixTests.printSummary(); printf("NnOpS: \r\n"); - NnOpsTest nnTest; - nnTest.runAll(); - printf("Nn Ops result...\r\n"); - nnTest.printSummary(); - - printf("Transformation test: \r\n"); - transTest tTest; - tTest.runAll(); - printf("Transformation result...\r\n"); - tTest.printSummary(); - - printf("tesnor test: \r\n"); - tensorTest tenT; - tenT.runAll(); - printf("tensor result: \r\n"); - tenT.printSummary(); - - + NnOpsTest nnTest; + nnTest.runAll(); + printf("Nn Ops result...\r\n"); + nnTest.printSummary(); - printf("mlp test: \r\n"); +/* printf("mlp test: \r\n"); mlpTest mlpt; mlpt.runAll(); printf("mlp result...\r\n"); - mlpt.printSummary(); + mlpt.printSummary();*/ diff --git a/mlp_test.hpp b/mlp_test.hpp index be390b9b..4c1fe189 100644 --- a/mlp_test.hpp +++ b/mlp_test.hpp @@ -21,54 +21,47 @@ class mlpTest : public Test { //reshape //input - TENSOR mnist_input = ctx.add(t_import.float_import("/fs/testData/mlpTest/runQuantization/in/import-Placeholder_0.idx")); - TENSOR reshape_dim = ctx.add(t_import.int_import("/fs/testData/mlpTest/runQuantization/in/import-MatMul_eightbit_reshape_dims_0.idx")); + S_TENSOR mnist_input = ctx.add(t_import.float_import("/fs/testData/mlpTest/runQuantization/in/import-Placeholder_0.idx", "mnist_input")); + S_TENSOR reshape_dim = ctx.add(t_import.int_import("/fs/testData/mlpTest/runQuantization/in/import-MatMul_eightbit_reshape_dims_0.idx", "reshape_dim")); //output - TENSOR reshape_out = ctx.add(new RamTensor()); + S_TENSOR reshape_out = ctx.add(new RamTensor("reshape_out")); // S_TENSOR out_reshape_out = reshape_out.lock(); - ctx.push(new ReshapeOp(), {mnist_input, reshape_dim}, {reshape_out}); + ctx.push(new ReshapeOp(), {"mnist_input", "reshape_dim"}, {"reshape_out"}); //min //input - TENSOR min_reduce_dim = ctx.add(t_import.int_import("/fs/testData/mlpTest/runQuantization/in/import-MatMul_eightbit_reduction_dims_0_min.idx")); + S_TENSOR min_reduce_dim = ctx.add(t_import.int_import("/fs/testData/mlpTest/runQuantization/in/import-MatMul_eightbit_reduction_dims_0_min.idx", "min_reduce_dim")); //output - TENSOR min_out = ctx.add(new RamTensor({1})); + S_TENSOR min_out = ctx.add(new RamTensor({1}, "min_out")); // S_TENSOR out_min_out = min_out.lock(); - ctx.push(new MinOp(), {reshape_out, min_reduce_dim}, {min_out}); + ctx.push(new MinOp(), {"reshape_out", "min_reduce_dim"}, {"min_out"}); //max //input - TENSOR max_reduce_dim = ctx.add(t_import.int_import("/fs/testData/mlpTest/runQuantization/in/import-MatMul_eightbit_reduction_dims_0_max.idx")); + S_TENSOR max_reduce_dim = ctx.add(t_import.int_import("/fs/testData/mlpTest/runQuantization/in/import-MatMul_eightbit_reduction_dims_0_max.idx", "max_reduce_dim")); //output - TENSOR max_out = ctx.add(new RamTensor({1})); + S_TENSOR max_out = ctx.add(new RamTensor({1}, "max_out")); // S_TENSOR out_max_out = max_out.lock(); - ctx.push(new MaxOp(), {reshape_out, max_reduce_dim}, {max_out}); + ctx.push(new MaxOp(), {"reshape_out", "max_reduce_dim"}, {"max_out"}); //quantization //output - TENSOR qnt_out = ctx.add(new RamTensor()); - TENSOR qnt_min = ctx.add(new RamTensor({1})); - TENSOR qnt_max = ctx.add(new RamTensor({1})); - - S_TENSOR out_qnt = qnt_out.lock(); - S_TENSOR out_min = qnt_min.lock(); - S_TENSOR out_max = qnt_max.lock(); - - TENSOR qnt_ref = ctx.add(t_import.ubyte_import("/fs/testData/mlpTest/runQuantization/out/import-MatMul_eightbit_quantize_Placeholder_0.idx")); - TENSOR qnt_min_ref = ctx.add(t_import.float_import("/fs/testData/mlpTest/runQuantization/out/import-MatMul_eightbit_quantize_Placeholder_1.idx")); - TENSOR qnt_max_ref = ctx.add(t_import.float_import("/fs/testData/mlpTest/runQuantization/out/import-MatMul_eightbit_quantize_Placeholder_2.idx")); - S_TENSOR ref_qnt = qnt_ref.lock(); - S_TENSOR ref_max = qnt_max_ref.lock(); - S_TENSOR ref_min = qnt_min_ref.lock(); - - ctx.push(new QuantizeV2Op(), {reshape_out, min_out, max_out}, {qnt_out, qnt_min, qnt_max}); + S_TENSOR qnt_out = ctx.add(new RamTensor("qnt_out")); + S_TENSOR qnt_min = ctx.add(new RamTensor({1}, "qnt_min")); + S_TENSOR qnt_max = ctx.add(new RamTensor({1}, "qnt_max")); + + S_TENSOR qnt_ref = ctx.add(t_import.ubyte_import("/fs/testData/mlpTest/runQuantization/out/import-MatMul_eightbit_quantize_Placeholder_0.idx", "qnt_ref")); + S_TENSOR qnt_min_ref = ctx.add(t_import.float_import("/fs/testData/mlpTest/runQuantization/out/import-MatMul_eightbit_quantize_Placeholder_1.idx", "qnt_min_ref")); + S_TENSOR qnt_max_ref = ctx.add(t_import.float_import("/fs/testData/mlpTest/runQuantization/out/import-MatMul_eightbit_quantize_Placeholder_2.idx", "qnt_max_ref")); + + ctx.push(new QuantizeV2Op(), {"reshape_out", "min_out", "max_out"}, {"qnt_out", "qnt_min", "qnt_max"}); ctx.eval(); timer_stop(); - double result = meanPercentErr(ref_qnt.get(), out_qnt.get()); - result += meanPercentErr(ref_min.get(), out_min.get()); - result += meanPercentErr(ref_max.get(), out_max.get()); + double result = meanPercentErr(qnt_ref.get(), qnt_out.get()); + result += meanPercentErr(qnt_min_ref.get(), qnt_min.get()); + result += meanPercentErr(qnt_max_ref.get(), qnt_max.get()); passed(result == 0); } @@ -82,152 +75,120 @@ class mlpTest : public Test { //quantized matrix multiplication //input - TENSOR x = - ctx.add(t_import.ubyte_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-MatMul_eightbit_quantize_Placeholder_0.idx")); - TENSOR x_min = - ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-MatMul_eightbit_quantize_Placeholder_1.idx")); - TENSOR x_max = - ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-MatMul_eightbit_quantize_Placeholder_2.idx")); - TENSOR w = - ctx.add(t_import.ubyte_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-Variable_quint8_const_0.idx")); - TENSOR w_min = - ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-Variable_min_0.idx")); - TENSOR w_max = - ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-Variable_max_0.idx")); + S_TENSOR x = + ctx.add(t_import.ubyte_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-MatMul_eightbit_quantize_Placeholder_0.idx", "x")); + S_TENSOR x_min = + ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-MatMul_eightbit_quantize_Placeholder_1.idx", "x_min")); + S_TENSOR x_max = + ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-MatMul_eightbit_quantize_Placeholder_2.idx", "x_max")); + S_TENSOR w = + ctx.add(t_import.ubyte_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-Variable_quint8_const_0.idx", "w")); + S_TENSOR w_min = + ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-Variable_min_0.idx", "w_min")); + S_TENSOR w_max = + ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-Variable_max_0.idx", "w_max")); DEBUG("all QuantizedMatMul input imported...\r\n"); //output - uint32_t out_col = (x.lock()->getShape())[0]; - uint32_t out_row = (w.lock()->getShape())[1]; - TENSOR out_c = ctx.add(new RamTensor({out_col, out_row})); + uint32_t out_col = (x->getShape())[0]; + uint32_t out_row = (w->getShape())[1]; + S_TENSOR out_c = ctx.add(new RamTensor({out_col, out_row}, "out_c")); // printf("x[0] = %d, x[1] = %d, b[0] = %d, b[1] = %d\r\n", (x.getShape())[0], (x.getShape())[1], // (w.getShape())[0], (w.getShape())[1]); // printf("c[0] = %d, c[1] = %d\r\n", (out_c.getShape())[0], (out_c.getShape())[1]); // fflush(stdout); - TENSOR matmul_out_min = ctx.add(new RamTensor({1})); - TENSOR matmul_out_max = ctx.add(new RamTensor({1})); + S_TENSOR matmul_out_min = ctx.add(new RamTensor({1}, "matmul_out_min")); + S_TENSOR matmul_out_max = ctx.add(new RamTensor({1}, "matmul_out_max")); - TList inputs = {x, x_min, x_max, w, w_min, w_max}; - TList outputs = {out_c, matmul_out_min, matmul_out_max}; - S_TENSOR out_val = out_c.lock(); - S_TENSOR out_min = matmul_out_min.lock(); - S_TENSOR out_max = matmul_out_max.lock(); - ctx.push(new QntMatMulOp(), inputs, outputs); + ctx.push(new QntMatMulOp(), {"x", "x_min", "x_max", "w", "w_min", "w_max"}, {"out_c", "matmul_out_min", "matmul_out_max"}); //clean up - TENSOR ref_out_c = - ctx.add(t_import.int_import("/fs/testData/mlpTest/runQntDeqntLayerZ/import-MatMul_eightbit_quantized_mat_mul_0.idx")); - TENSOR ref_matmul_out_min = - ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/import-MatMul_eightbit_quantized_mat_mul_1.idx")); - TENSOR ref_matmul_out_max = - ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/import-MatMul_eightbit_quantized_mat_mul_2.idx")); - - S_TENSOR ref_val = ref_out_c.lock(); - S_TENSOR ref_min = ref_matmul_out_min.lock(); - S_TENSOR ref_max = ref_matmul_out_max.lock(); - /* double temp_result = (meanPercentErr(ref_val.get(), out_val.get()) + meanPercentErr(ref_min.get(), out_min.get()) + meanPercentErr(ref_max.get(), out_max.get())); - if(temp_result > 0) { - DEBUG("matrix mul failed\r\n"); - failed(); - return; - } else { - DEBUG("matrix mul passed\r\n"); - } -*/ + // double temp_result = (meanPercentErr(ref_val.get(), out_val.get()) + meanPercentErr(ref_min.get(), out_min.get()) + meanPercentErr(ref_max.get(), out_max.get())); + //if(temp_result > 0) { + // DEBUG("matrix mul failed\r\n"); + // failed(); + // return; + // } else { + // DEBUG("matrix mul passed\r\n"); + // } + DEBUG("QuantizedMatMul completed!\r\n"); //output - TENSOR req_out_min = ctx.add(new RamTensor({1})); - TENSOR req_out_max = ctx.add(new RamTensor({1})); - S_TENSOR out_req_min = req_out_min.lock(); - S_TENSOR out_req_max = req_out_max.lock(); - ctx.push(new Requantization_RangeOp(), {out_c, matmul_out_min, matmul_out_max}, {req_out_min, req_out_max}); - - TENSOR ref_req_out_min = - ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-MatMul_eightbit_requant_range_0.idx")); - TENSOR ref_req_out_max = - ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/in/import-MatMul_eightbit_requant_range_1.idx")); - S_TENSOR ref_req_min = ref_req_out_min.lock(); - S_TENSOR ref_req_max = ref_req_out_max.lock(); -/* - temp_result = (meanPercentErr(ref_req_min.get(), out_req_min.get()) + meanPercentErr(ref_req_max.get(), out_req_max.get())); - if(temp_result > 0) { - DEBUG("Requantization_Range failed\r\n"); - failed(); - return; - } else { - DEBUG("Requantization_Range passed\r\n"); - } - - DEBUG("Requantization_Range completed!\r\n");*/ + S_TENSOR req_out_min = ctx.add(new RamTensor({1}, "req_out_min")); + S_TENSOR req_out_max = ctx.add(new RamTensor({1}, "req_out_max")); + ctx.push(new Requantization_RangeOp(), {"out_c", "matmul_out_min", "matmul_out_max"}, {"req_out_min", "req_out_max"}); + + +// temp_result = (meanPercentErr(ref_req_min.get(), out_req_min.get()) + meanPercentErr(ref_req_max.get(), out_req_max.get())); +// if(temp_result > 0) { +// DEBUG("Requantization_Range failed\r\n"); +// failed(); +// return; +// } else { +// DEBUG("Requantization_Range passed\r\n"); +// } + +// DEBUG("Requantization_Range completed!\r\n"); //output - TENSOR reqnt_out = ctx.add(new RamTensor(out_c.lock()->getShape())); - TENSOR reqnt_out_min = ctx.add(new RamTensor({1})); - TENSOR reqnt_out_max = ctx.add(new RamTensor({1})); - S_TENSOR out_reqnt = reqnt_out.lock(); - S_TENSOR out_reqnt_min = reqnt_out_min.lock(); - S_TENSOR out_reqnt_max = reqnt_out_max.lock(); - ctx.push(new RequantizeOp(), {out_c, matmul_out_min, matmul_out_max, req_out_min, req_out_max}, {reqnt_out, reqnt_out_min, reqnt_out_max}); + S_TENSOR reqnt_out = ctx.add(new RamTensor(out_c->getShape(), "reqnt_out")); + S_TENSOR reqnt_out_min = ctx.add(new RamTensor({1}, "reqnt_out_min")); + S_TENSOR reqnt_out_max = ctx.add(new RamTensor({1}, "reqnt_out_max")); + ctx.push(new RequantizeOp(), {"out_c", "matmul_out_min", "matmul_out_max", "req_out_min", "req_out_max"}, {"reqnt_out", "reqnt_out_min", "reqnt_out_max"}); //clean up - TENSOR ref_reqnt_out = - ctx.add(t_import.ubyte_import("/fs/testData/mlpTest/runQntDeqntLayerZ/import-MatMul_eightbit_requantize_0.idx")); - TENSOR ref_reqnt_out_min = - ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/import-MatMul_eightbit_requantize_1.idx")); - TENSOR ref_reqnt_out_max = - ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/import-MatMul_eightbit_requantize_2.idx")); - S_TENSOR ref_reqnt = ref_reqnt_out.lock(); - S_TENSOR ref_reqnt_min = ref_reqnt_out_min.lock(); - S_TENSOR ref_reqnt_max = ref_reqnt_out_max.lock(); -/* - temp_result = (meanPercentErr(ref_reqnt.get(), out_reqnt.get()) + meanPercentErr(ref_reqnt_min.get(), out_reqnt_min.get()) + meanPercentErr(ref_reqnt_max.get(), out_reqnt_max.get())); - if(temp_result > 0) { - DEBUG("Requantize failed\r\n"); - failed(); - return; - } else { - DEBUG("Requantize passed\r\n"); - } - - DEBUG("Requantize completed!\r\n");*/ + S_TENSOR ref_reqnt_out = + ctx.add(t_import.ubyte_import("/fs/testData/mlpTest/runQntDeqntLayerZ/import-MatMul_eightbit_requantize_0.idx", "ref_reqnt_out")); + S_TENSOR ref_reqnt_out_min = + ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/import-MatMul_eightbit_requantize_1.idx", "ref_reqnt_out_min")); + S_TENSOR ref_reqnt_out_max = + ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/import-MatMul_eightbit_requantize_2.idx", "ref_reqnt_out_max")); + +// temp_result = (meanPercentErr(ref_reqnt.get(), out_reqnt.get()) + meanPercentErr(ref_reqnt_min.get(), out_reqnt_min.get()) + meanPercentErr(ref_reqnt_max.get(), out_reqnt_max.get())); +// if(temp_result > 0) { +// DEBUG("Requantize failed\r\n"); +// failed(); +// return; +// } else { +// DEBUG("Requantize passed\r\n"); +// } + + DEBUG("Requantize completed!\r\n"); //output - TENSOR deqnt_out = ctx.add(new RamTensor(out_c.lock()->getShape())); - S_TENSOR out_deqnt = deqnt_out.lock(); - ctx.push(new DequantizeOp(), {reqnt_out, reqnt_out_min, reqnt_out_max}, {deqnt_out}); - - TENSOR ref_deqnt_out = ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/import-MatMul_0.idx")); - S_TENSOR ref_deqnt = ref_deqnt_out.lock(); - /*double temp = meanPercentErr(ref_deqnt.get(), out_deqnt.get()); - if(temp > 0.0001) { - printf("dequantize failed (%.6f)\r\n", temp); - const float* ref_ptr = ref_deqnt.get()->read(0, 0); - const float* test_ptr = out_deqnt.get()->read(0, 0); - for(uint32_t i; i < ref_deqnt->getSize(); i++) { - if(ref_ptr[i] != test_ptr[i]) { - DEBUG("%d: %.3f != %.3f, diff: %.8f%%\r\n", i, ref_ptr[i], test_ptr[i], test_ptr[i]/ref_ptr[i]); - } else { - DEBUG("%d: %.3f == %.3f\r\n", i, ref_ptr[i], test_ptr[i]); - } - } - failed(); - return; - } else { - DEBUG("dequantize passed\r\n"); - }*/ + S_TENSOR deqnt_out = ctx.add(new RamTensor(out_c->getShape(), "deqnt_out")); + ctx.push(new DequantizeOp(), {"reqnt_out", "reqnt_out_min", "reqnt_out_max"}, {"deqnt_out"}); + + S_TENSOR ref_deqnt_out = ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/import-MatMul_0.idx", "ref_deqnt_out")); + //double temp = meanPercentErr(ref_deqnt.get(), out_deqnt.get()); + //if(temp > 0.0001) { + // printf("dequantize failed (%.6f)\r\n", temp); + // const float* ref_ptr = ref_deqnt.get()->read(0, 0); + // const float* test_ptr = out_deqnt.get()->read(0, 0); + // for(uint32_t i; i < ref_deqnt->getSize(); i++) { + // if(ref_ptr[i] != test_ptr[i]) { + // DEBUG("%d: %.3f != %.3f, diff: %.8f%%\r\n", i, ref_ptr[i], test_ptr[i], test_ptr[i]/ref_ptr[i]); + // } else { + // DEBUG("%d: %.3f == %.3f\r\n", i, ref_ptr[i], test_ptr[i]); + // } + // } + // failed(); + // return; + // } else { + // DEBUG("dequantize passed\r\n"); + // } DEBUG("dequantize completed!\r\n"); //input - TENSOR bias = ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/out/import-Variable_1_0.idx")); + S_TENSOR bias = ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/out/import-Variable_1_0.idx", "bias")); //output - TENSOR output_z = ctx.add(new RamTensor(deqnt_out.lock()->getShape())); - S_TENSOR out_z = output_z.lock(); - ctx.push(new AddOp(), {deqnt_out, bias}, {output_z}); + S_TENSOR output_z = ctx.add(new RamTensor(deqnt_out->getShape(), "output_z")); + ctx.push(new AddOp(), {"deqnt_out", "bias"}, {"output_z"}); ctx.eval(); DEBUG("Add completed!\r\n"); @@ -235,10 +196,9 @@ class mlpTest : public Test { timer_stop(); //load reference - TENSOR ref_z = ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/out/import-add_0.idx")); - S_TENSOR ref_z_v = ref_z.lock(); + S_TENSOR ref_z = ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntDeqntLayerZ/out/import-add_0.idx", "ref_z")); - double result = meanPercentErr(ref_z_v.get(), out_z.get()); + double result = meanPercentErr(ref_z.get(), output_z.get()); passed(result < 0.0001); @@ -248,59 +208,53 @@ class mlpTest : public Test { testStart("runQntRelu"); - TENSOR input_z = ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntRelu/in/import-add_0.idx")); - TENSOR reshape_dim = ctx.add(t_import.int_import("/fs/testData/mlpTest/runQntRelu/in/import-Relu_eightbit_reshape_dims_0.idx")); - TENSOR reshape_out = ctx.add(new RamTensor()); + S_TENSOR input_z = ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntRelu/in/import-add_0.idx", "input_z1")); + S_TENSOR reshape_dim = ctx.add(t_import.int_import("/fs/testData/mlpTest/runQntRelu/in/import-Relu_eightbit_reshape_dims_0.idx", "reshape_dim1")); + S_TENSOR reshape_out = ctx.add(new RamTensor("reshape_out1")); timer_start(); - ctx.push(new ReshapeOp(), {input_z, reshape_dim}, {reshape_out}); + ctx.push(new ReshapeOp(), {"input_z1", "reshape_dim1"}, {"reshape_out1"}); //min //input - TENSOR min_reduce_dim = ctx.add(t_import.int_import("/fs/testData/mlpTest/runQntRelu/in/import-Relu_eightbit_reduction_dims_0_min.idx")); + S_TENSOR min_reduce_dim = ctx.add(t_import.int_import("/fs/testData/mlpTest/runQntRelu/in/import-Relu_eightbit_reduction_dims_0_min.idx", "min_reduce_dim1")); //output - TENSOR min_out = ctx.add(new RamTensor({1})); - ctx.push(new MinOp(), {reshape_out, min_reduce_dim}, {min_out}); + S_TENSOR min_out = ctx.add(new RamTensor({1}, "min_out1")); + ctx.push(new MinOp(), {"reshape_out1", "min_reduce_dim1"}, {"min_out1"}); //max //input - TENSOR max_reduce_dim = ctx.add(t_import.int_import("/fs/testData/mlpTest/runQntRelu/in/import-Relu_eightbit_reduction_dims_0_max.idx")); + S_TENSOR max_reduce_dim = ctx.add(t_import.int_import("/fs/testData/mlpTest/runQntRelu/in/import-Relu_eightbit_reduction_dims_0_max.idx", "max_reduce_dim1")); //output - TENSOR max_out = ctx.add(new RamTensor({1})); - ctx.push(new MaxOp(), {reshape_out, max_reduce_dim}, {max_out}); + S_TENSOR max_out = ctx.add(new RamTensor({1}, "max_out1")); + ctx.push(new MaxOp(), {"reshape_out1", "max_reduce_dim1"}, {"max_out1"}); //quantization //output - TENSOR qnt_out = ctx.add(new RamTensor()); - TENSOR qnt_min = ctx.add(new RamTensor({1})); - TENSOR qnt_max = ctx.add(new RamTensor({1})); - ctx.push(new QuantizeV2Op(), {reshape_out, min_out, max_out}, {qnt_out, qnt_min, qnt_max}); + S_TENSOR qnt_out = ctx.add(new RamTensor("qnt_out1")); + S_TENSOR qnt_min = ctx.add(new RamTensor({1}, "qnt_min1")); + S_TENSOR qnt_max = ctx.add(new RamTensor({1}, "qnt_max1")); + ctx.push(new QuantizeV2Op(), {"reshape_out1", "min_out1", "max_out1"}, {"qnt_out1", "qnt_min1", "qnt_max1"}); - TENSOR out = ctx.add(new RamTensor()); - TENSOR out_min = ctx.add(new RamTensor({1})); - TENSOR out_max = ctx.add(new RamTensor({1})); - - S_TENSOR out_val = out.lock(); - S_TENSOR out_min_val = out_min.lock(); - S_TENSOR out_max_val = out_max.lock(); - ctx.push(new ReluOp(), {qnt_out, qnt_min, qnt_max}, {out, out_min, out_max}); + S_TENSOR out = ctx.add(new RamTensor("out1")); + S_TENSOR out_min = ctx.add(new RamTensor({1}, "out_min1")); + S_TENSOR out_max = ctx.add(new RamTensor({1}, "out_max1")); + + ctx.push(new ReluOp(), {"qnt_out1", "qnt_min1", "qnt_max1"}, {"out1", "out_min1", "out_max1"}); ctx.eval(); timer_stop(); - TENSOR ref_out = - ctx.add(t_import.ubyte_import("/fs/testData/mlpTest/runQntRelu/out/import-Relu_eightbit_quantized_0.idx")); - TENSOR ref_out_min = ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntRelu/out/import-Relu_eightbit_quantized_1.idx")); - TENSOR ref_out_max = - ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntRelu/out/import-Relu_eightbit_quantized_2.idx")); - - S_TENSOR ref_val = ref_out.lock(); - S_TENSOR ref_min_val = ref_out_min.lock(); - S_TENSOR ref_max_val = ref_out_max.lock(); - double result = meanPercentErr(ref_val.get(), out_val.get()); - result += meanPercentErr(ref_min_val.get(), out_min_val.get()); - result += meanPercentErr(ref_max_val.get(), out_max_val.get()); + S_TENSOR ref_out = + ctx.add(t_import.ubyte_import("/fs/testData/mlpTest/runQntRelu/out/import-Relu_eightbit_quantized_0.idx", "ref_out1")); + S_TENSOR ref_out_min = ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntRelu/out/import-Relu_eightbit_quantized_1.idx", "ref_out_min1")); + S_TENSOR ref_out_max = + ctx.add(t_import.float_import("/fs/testData/mlpTest/runQntRelu/out/import-Relu_eightbit_quantized_2.idx", "ref_out_max1")); + + double result = meanPercentErr(ref_out.get(), out.get()); + result += meanPercentErr(ref_out_min.get(), out_min.get()); + result += meanPercentErr(ref_out_max.get(), out_max.get()); passed(result == 0); diff --git a/tensor.hpp b/tensor.hpp index 91d99741..76ee6040 100644 --- a/tensor.hpp +++ b/tensor.hpp @@ -18,18 +18,23 @@ // }; class Tensor; - +typedef std::string TName; +typedef std::string OpName; +typedef std::vector TNameList; typedef std::shared_ptr S_TENSOR; -typedef std::weak_ptr TENSOR; -typedef std::vector TList; typedef std::vector S_TList; class uTensor { public: virtual void inFocus(){}; virtual void deFocus(){}; + virtual std::string getName() { return name; } + virtual void setName(std::string _name) { name = _name; } + virtual ~uTensor() = 0; +private: + std::string name; }; @@ -57,10 +62,11 @@ class Tensor : public uTensor { protected: std::shared_ptr s; // short for states public: - Tensor(void) { + Tensor(TName &_name) { s = std::make_shared(); s->total_size = 0; s->data = nullptr; + setName(_name); } // returns how far a given dimension is apart @@ -149,9 +155,9 @@ template class RamTensor : public Tensor { // need deep copy public: - RamTensor() : Tensor() {} + RamTensor(TName _name) : Tensor(_name) {} - RamTensor(std::initializer_list l) : Tensor() { + RamTensor(std::initializer_list l, TName _name) : Tensor(_name) { std::vector v; for (auto i : l) { v.push_back(i); @@ -160,7 +166,7 @@ class RamTensor : public Tensor { Tensor::init(v); } - RamTensor(std::vector v) : Tensor() { + RamTensor(std::vector v, TName _name) : Tensor(_name) { Tensor::init(v); } @@ -212,8 +218,8 @@ class RamTensor : public Tensor { }; template -Tensor* TensorCast(Tensor* input) { - Tensor* output = new RamTensor(input->getShape()); +Tensor* TensorCast(Tensor* input, TName name) { + Tensor* output = new RamTensor(input->getShape(), name); const Tin* inputPrt = input->read(0, 0); Tout* outputPrt = output->write(0, 0); @@ -225,8 +231,8 @@ Tensor* TensorCast(Tensor* input) { } template -Tensor* TensorConstant(std::vector shape, T c) { - Tensor* output = new RamTensor(shape); +Tensor* TensorConstant(std::vector shape, T c, TName const &name) { + Tensor* output = new RamTensor(shape, name); T* outPrt = output->write(0, 0); for (uint32_t i = 0; i < output->getSize(); i++) { @@ -237,13 +243,13 @@ Tensor* TensorConstant(std::vector shape, T c) { } template -Tensor* TensorConstant(std::initializer_list l, T c) { +Tensor* TensorConstant(std::initializer_list l, T c, TName const &name) { std::vector v; for (auto i : l) { v.push_back(i); } - return TensorConstant(v, c); + return TensorConstant(v, c, name); } // diff --git a/tensorIdxImporter.hpp b/tensorIdxImporter.hpp index 3fcaf5cd..a5f61ab7 100644 --- a/tensorIdxImporter.hpp +++ b/tensorIdxImporter.hpp @@ -33,25 +33,25 @@ class TensorIdxImporter { HeaderMeta header; HeaderMeta parseHeader(void); template - Tensor* loader(string& filename, IDX_DTYPE idx_type); + Tensor* loader(string& filename, IDX_DTYPE idx_type, string name); void open(string filename); // void open(FILE *fp); public: - Tensor* ubyte_import(string filename) { - return loader(filename, IDX_DTYPE::idx_ubyte); + Tensor* ubyte_import(string filename, string name) { + return loader(filename, IDX_DTYPE::idx_ubyte, name); } - Tensor* byte_import(string filename) { - return loader(filename, IDX_DTYPE::idx_byte); + Tensor* byte_import(string filename, string name) { + return loader(filename, IDX_DTYPE::idx_byte, name); } - Tensor* short_import(string filename) { - return loader(filename, IDX_DTYPE::idx_short); + Tensor* short_import(string filename, string name) { + return loader(filename, IDX_DTYPE::idx_short, name); } - Tensor* int_import(string filename) { - return loader(filename, IDX_DTYPE::idx_int); + Tensor* int_import(string filename, string name) { + return loader(filename, IDX_DTYPE::idx_int, name); } - Tensor* float_import(string filename) { - return loader(filename, IDX_DTYPE::idx_float); + Tensor* float_import(string filename, string name) { + return loader(filename, IDX_DTYPE::idx_float, name); } uint32_t getMagicNumber(unsigned char dtype, unsigned char dim); uint8_t getIdxDTypeSize(IDX_DTYPE dtype); @@ -65,7 +65,7 @@ class TensorIdxImporter { template -Tensor* TensorIdxImporter::loader(string& filename, IDX_DTYPE idx_type) { +Tensor* TensorIdxImporter::loader(string& filename, IDX_DTYPE idx_type, string name) { fp = fopen(filename.c_str(), "r"); DEBUG("Opening file %s ", filename.c_str()); @@ -79,7 +79,7 @@ Tensor* TensorIdxImporter::loader(string& filename, IDX_DTYPE idx_type) { fseek(fp, header.dataPos, SEEK_SET); // need error handling - Tensor* t = new RamTensor(header.dim); // tensor allocated + Tensor* t = new RamTensor(header.dim, name); // tensor allocated const uint8_t unit_size = t->unit_size(); U* val = (U*)malloc(unit_size); diff --git a/tensorIdxImporterTests.hpp b/tensorIdxImporterTests.hpp index cf052073..833eb811 100644 --- a/tensorIdxImporterTests.hpp +++ b/tensorIdxImporterTests.hpp @@ -20,7 +20,7 @@ class idxImporterTest : public Test { TensorIdxImporter t_import; timer_start(); Tensor* t = - t_import.ubyte_import("/fs/testData/idxImport/uint8_4d_power2.idx"); + t_import.ubyte_import("/fs/testData/idxImport/uint8_4d_power2.idx", "uchar1"); timer_stop(); double result = sum(t); passed(result == 4518); @@ -32,7 +32,7 @@ class idxImporterTest : public Test { TensorIdxImporter t_import; timer_start(); Tensor* t = - t_import.short_import("/fs/testData/idxImport/int16_4d_power2.idx"); + t_import.short_import("/fs/testData/idxImport/int16_4d_power2.idx", "short1"); timer_stop(); double result = sum(t); passed(result == 270250); @@ -44,7 +44,7 @@ class idxImporterTest : public Test { TensorIdxImporter t_import; timer_start(); Tensor* t = - t_import.int_import("/fs/testData/idxImport/int32_4d_power2.idx"); + t_import.int_import("/fs/testData/idxImport/int32_4d_power2.idx", "int1"); timer_stop(); double result = sum(t); passed(result == 5748992600); @@ -56,7 +56,7 @@ class idxImporterTest : public Test { TensorIdxImporter t_import; timer_start(); Tensor* t = - t_import.float_import("/fs/testData/idxImport/float_4d_power2.idx"); + t_import.float_import("/fs/testData/idxImport/float_4d_power2.idx", "float1"); timer_stop(); double result = sum(t); diff --git a/tensor_test.hpp b/tensor_test.hpp index d5a07a72..8cab02b6 100644 --- a/tensor_test.hpp +++ b/tensor_test.hpp @@ -12,7 +12,7 @@ class tensorTest : public Test { public: void runResize() { testStart("tensortest"); - Tensor* a = new RamTensor({3, 2, 3}); + Tensor* a = new RamTensor({3, 2, 3}, "a"); std::vector v({1, 5, 8}); a->resize(v); bool res = testsize(1 * 5 * 8, a->getSize()); @@ -32,18 +32,20 @@ class transTest : public Test { bool res = false; testStart("transtest"); - for (int i = 0; i < 10; i++) { + for (int i = 0; i < 9; i++) { std::default_random_engine gen; vector tmp({2, 3, 4, 5}); - TENSOR inputTensor = ctx.add(new RamTensor(tmp)); + std::string a_s = "input" + std::to_string(i); + S_TENSOR inputTensor = ctx.add(new RamTensor(tmp, a_s)); vector permute = {2, 3, 1, 0}; - vector g = inputTensor.lock()->getShape(); + vector g = inputTensor->getShape(); std::shuffle(permute.begin(), permute.end(), gen); - permuteIndexTransform trans(inputTensor.lock()->getShape(), permute); + permuteIndexTransform trans(inputTensor->getShape(), permute); - TENSOR output = ctx.add(new RamTensor(trans.getNewShape())); - vector s = output.lock()->getShape(); + std::string a_o = "output" + std::to_string(i); + S_TENSOR output = ctx.add(new RamTensor(trans.getNewShape(), a_o)); + vector s = output->getShape(); res = testshape(g, s, permute); if (!res) { break; @@ -59,10 +61,10 @@ class transTest : public Test { vector output_1({2, 2, 3, 5, 6, 6, 4, 5, 7, 5, 1, 9, 1, 3, 2, 2, 5, 3, 3, 6, 3, 4, 9, 2}); - TENSOR inputTensor = ctx.add(new RamTensor({2, 3, 4})); + S_TENSOR inputTensor2 = ctx.add(new RamTensor({2, 3, 4}, "inputTensor2")); vector permute = {0, 2, 1}; - permuteIndexTransform trans(inputTensor.lock()->getShape(), permute); + permuteIndexTransform trans(inputTensor2->getShape(), permute); size_t out_index = 0; bool res = false; @@ -85,9 +87,9 @@ class transTest : public Test { vector output_2({2, 1, 2, 3, 3, 2, 5, 2, 6, 5, 6, 3, 4, 3, 5, 6, 7, 3, 5, 4, 1, 9, 9, 2}); - TENSOR inputTensor2 = ctx.add(new RamTensor({2, 4, 3})); + S_TENSOR inputTensor3 = ctx.add(new RamTensor({2, 4, 3}, "inputTensor3")); vector permute2 = {1, 2, 0}; - permuteIndexTransform trans2(inputTensor2.lock()->getShape(), permute2); + permuteIndexTransform trans2(inputTensor3->getShape(), permute2); testStart("test vec 2 for transform"); for (uint32_t i = 0; i < input_2.size(); i++) { out_index = trans2[i]; @@ -105,9 +107,9 @@ class transTest : public Test { vector output_3({8, 2, 8, 1, 0, 3, 4, 6, 2, 6, 0, 6, 3, 9, 2, 7, 0, 7, 0, 4, 8, 9, 0, 4, 3, 6, 8}); - TENSOR inputTensor3 = ctx.add(new RamTensor({1, 3, 3, 3})); + S_TENSOR inputTensor4 = ctx.add(new RamTensor({1, 3, 3, 3}, "inputTensor4")); vector permute3 = {0, 3, 2, 1}; - permuteIndexTransform trans3(inputTensor3.lock()->getShape(), permute3); + permuteIndexTransform trans3(inputTensor4->getShape(), permute3); testStart("test vec 4d for transform"); for (uint32_t i = 0; i < input_3.size(); i++) { out_index = trans3[i]; diff --git a/uTensorBase.cpp b/uTensorBase.cpp index 5f1e3b6e..57b0ec91 100644 --- a/uTensorBase.cpp +++ b/uTensorBase.cpp @@ -1,17 +1,14 @@ #include "uTensorBase.hpp" -void Operator::setInputs(TList &_inputs) { +void Operator::setInputs(S_TList &_inputs) { if(_inputs.size() != n_inputs) ERR_EXIT("Input Tensor list mismatched..."); - for(uint8_t i=0; i < _inputs.size(); i++) { - inputs.push_back(_inputs[i].lock()); - } + inputs = _inputs; } -void Operator::setOutputs(TList &_outputs) { +void Operator::setOutputs(S_TList &_outputs) { if(_outputs.size() != n_outputs) ERR_EXIT("Input Tensor list mismatched..."); - for(uint8_t i=0; i < _outputs.size(); i++) { - outputs.push_back(_outputs[i].lock()); - } + outputs = _outputs; + } diff --git a/uTensorBase.hpp b/uTensorBase.hpp index 8616aad4..7886705d 100644 --- a/uTensorBase.hpp +++ b/uTensorBase.hpp @@ -18,8 +18,8 @@ class Operator : public uTensor { public: virtual void compute() = 0; - void setInputs(TList &_inputs); - void setOutputs(TList &_outputs); + void setInputs(S_TList &_inputs); + void setOutputs(S_TList &_outputs); S_TList getInputs(void) { return inputs; } S_TList getOutputs(void) { return outputs;} uint8_t getNumInputs(void) { return n_inputs; }