一些学习算法实现练习

PLA 感知机学习算法

参考资料:

  • 写给大家看的机器学习书(第三篇)

    # -*- coding: utf-8 -*-
    import numpy as np
    
    # 将列表转为列向量
    def vector(list):
    return np.mat(list).transpose()
    
    # 向x向量添加x0=1 将data格式化为(x,y)的格式
    def pre_process_data(list):
    return [(vector([1, l[0], l[1]]), l[2]) for l in list]
    
    # 符号函数 
    sign = lambda x:1 if x > 0 else -1
    
    # 以w为参数 遍历所有训练数据 判断训练数据中是否有点被切分错误
    def go_through_all_training_data(training_data, w):
    status = 'YES'
    for (x, y) in training_data:
        if sign((w.transpose() * x).tolist()[0][0]) != sign(y):
            status = 'NO'
            return (status, x, y)
    return (status, None, None)
    
    # 感知机学习算法 主函数
    def PLA(training_data):
    w = np.mat([1,2127,205]).transpose()  # Step 1: 向量w初始化
    while True:
        (status, x, y) = go_through_all_training_data(training_data, w)
        if status == 'YES':  # Step 2: 切分正确,学习完成
            return w
        else:
            w = w + y*x   # Step 3: 修正w
    
    if __name__ == '__main__':
    # 训练数据
    training_data = [
    [10, 300, -1],
    [15, 377, -1],
    [50, 137, 1],
    [65, 92 , 1],
    [45, 528, -1],
    [61, 542, 1],
    [26, 394, -1],
    [37, 703, -1],
    [39, 244, 1],
    [41, 398, 1],
    [53, 495, 1],
    [32, 119, 1],
    [24, 577, -1],
    [56, 412, 1]
    ]
    
    formated_training_data = pre_process_data(training_data)
    
    w = PLA(formated_training_data)
    
    print(w)