machine-learning - CNTK からの予期しない結果

Question

CNTK のかなり単純なテストを実行していますが、意味のある結果が得られません。トレーニング/テストデータは、1 つの特徴と 1 つのラベルで構成されています。機能は 10 進数で、ラベルは 0 ～ 5 の整数になります。ほとんどの場合、ラベルの値は 0 または 1 になり、値が高くなるにつれてますますまれになります。5 は約 16/30,000 のケースで表示されます。

奇妙なのは、結果を出力すると、考えられる各ラベルが発生する可能性がほぼ等しいことが示されることです。0 または 1 が最も可能性が高く、5 は非常に可能性が低いと予想します。SOがここで間違っている可能性があることに光を当ててくれることを望んでいました。以下に、いくつかのサンプルデータ、サンプル出力、および構成ファイルを含めました。

構成:

# Parameters can be overwritten on the command line
# for example: cntk configFile=myConfigFile RootDir=../..
# For running from Visual Studio add
# currentDirectory=$(SolutionDir)/<path to corresponding data folder>
RootDir = ".."

ConfigDir = "$RootDir$/Config"
DataDir = "$RootDir$/Data"
OutputDir = "$RootDir$/Output"
ModelDir = "$OutputDir$/Models"

# deviceId=-1 for CPU, >=0 for GPU devices, "auto" chooses the best GPU, or CPU if no usable GPU is available
deviceId = 0

command = Simple_Demo_Train:Simple_Demo_Train

precision = "float"
traceLevel = 1
modelPath = "$ModelDir$/simple.dnn"
outputNodeNames = ScaledLogLikelihood

#######################################
#  TRAINING CONFIG                    #
#######################################

Simple_Demo_Train = [
    action = "train"

    # Notation xxx:yyy*n:zzz is equivalent to xxx, then yyy repeated n times, then zzz
    # Example: 10:20*3:5 is equivalent to 10:20:20:20:5
    SimpleNetworkBuilder = [
        # 2 input, 2 50-element hidden, 2 output
        layerSizes = 1:50*3:6
        trainingCriterion = "CrossEntropyWithSoftmax"
        evalCriterion = "ErrorPrediction"
        layerTypes = "Sigmoid"
        initValueScale = 1.0
        applyMeanVarNorm = true
        uniformInit = true
        needPrior = true
    ]

    SGD = [
        # epochSize = 0 means epochSize is the size of the training set
        epochSize = 0
        minibatchSize = 25
        learningRatesPerMB = 0.5:0.2*20:0.1
        momentumPerMB = 0.9
        dropoutRate = 0.0
        maxEpochs = 10000
    ]

    # Parameter values for the reader
    reader = [
        readerType = "UCIFastReader"
        file = "$DataDir$/train.txt"
        miniBatchMode = "partial"
        randomize = "none"
        verbosity = 1

        features = [
            dim = 1        # two-dimensional input data
            start = 0      # Start with first element on line
        ]

        labels = [
            start = 1      # Skip two elements
            dim = 1        # One label dimension
            labelDim = 5   # Two labels possible
            labelMappingFile = "$DataDir$/mapping.txt"
        ]
    ]
]

########################################
#  TEST RESULTS                        #
#  (computes prediction error and      #
#   perplexity on a test set and       #
#   writes the output to the console.) #
########################################

Simple_Demo_Test = [
    action = "test"

    # Parameter values for the reader
    reader = [
        readerType = "UCIFastReader"
        file = "$DataDir$/test.txt"
        miniBatchMode = "partial"
        randomize = "none"
        verbosity = 1

        features = [
            dim = 1        # two-dimensional input data
            start = 0      # Start with first element on line
        ]

        labels = [
            start = 1      # Skip two elements
            dim = 1        # One label dimension
            labelDim = 5   # Two labels possible
            labelMappingFile = "$DataDir$/mapping.txt"
        ]
    ]
]

########################################
#  OUTPUT RESULTS                      #
#  (Computes the labels for a test set #
#   and writes the results to a file.) #
########################################

Simple_Demo_Output=[
    action = "write"

    # Parameter values for the reader
    reader = [
        readerType = "UCIFastReader"
        file = "$DataDir$/test.txt"
        miniBatchMode = "partial"
        randomize = "none"
        verbosity = 1

        features = [
            dim = 1        # two-dimensional input data
            start = 0      # Start with first element on line
        ]

        labels = [
            start = 1      # Skip two elements
            dim = 1        # One label dimension
            labelDim = 5   # Two labels possible
            labelMappingFile = "$DataDir$/mapping.txt"
        ]
    ]

    outputPath = "$OutputDir$/SimpleOutput"    # Dump output as text
]

サンプルトレーニングデータ:

0.86 2
0.84 0
6.818182 0
1.34 1
1 1
0.92 0
0.7692308 0
0.755102 1
0.86 2
5.466667 0
0.96 0
0.9459459 1
1 4
1 0
0.8421053 2
5.5 0
0.84 2
1.2 2
1.32 1
0.98 0
1 1
1.2 2
5.4 1
1.06 2
0.98 1
1.041667 3
0.82 2
7.333333 0

サンプル出力:

3.18673 3.18266 3.19894 3.18264 3.2388 3.235 
3.18683 3.18272 3.19895 3.18264 3.23872 3.23491 
3.18668 3.18263 3.19894 3.18263 3.23884 3.23505 
3.18653 3.18255 3.19893 3.18263 3.23895 3.23518 
6.53459 4.97457 3.46288 3.3192 0.668835 0.204602 
3.18667 3.18263 3.19894 3.18263 3.23884 3.23505 
3.18657 3.18258 3.19893 3.18263 3.23892 3.23515 
3.18655 3.18257 3.19893 3.18263 3.23894 3.23516 
3.18665 3.18262 3.19894 3.18263 3.23886 3.23507 
3.18656 3.18257 3.19893 3.18263 3.23893 3.23515 
3.18654 3.18256 3.19893 3.18263 3.23895 3.23517 
3.18688 3.18274 3.19895 3.18264 3.23869 3.23487 
3.18675 3.18267 3.19894 3.18264 3.23879 3.23498 
3.18679 3.18269 3.19895 3.18264 3.23875 3.23494 
3.1866 3.18259 3.19893 3.18263 3.2389 3.23512 
3.18655 3.18256 3.19893 3.18263 3.23894 3.23517 
3.18652 3.18255 3.19893 3.18263 3.23896 3.23519 
3.18656 3.18257 3.19893 3.18263 3.23893 3.23515 
3.18656 3.18257 3.19893 3.18263 3.23894 3.23516 
3.18688 3.18274 3.19895 3.18264 3.23869 3.23487 
3.18698 3.1828 3.19896 3.18265 3.23861 3.23477

マッピングファイル:

machine-learning - CNTK からの予期しない結果

1 に答える 1

Related

Reference