from statistics import mean
import math
import ast
def test(w, b):
n = int(input())
for _ in range(n):
line = input().split(",")
data = line[1:]
for i in range(len(data)):
if data[i] != "NaN":
data[i] = float(data[i])
pred = b
for i in range(len(data)):
pred += w[i] * data[i]
pred = 1 / (1 + math.exp(-pred))
if pred > 0.5:
print(1)
else:
print(0)
def read_data():
N = int(input())
data_matrix = []
ids = []
labels = []
for _ in range(N):
line = input().split(",")
id = line[0]
data = line[1:-1]
label = line[-1]
if label != "0" and label != "1":
continue
label = int(label)
for i in range(len(data)):
if data[i] != "NaN":
data[i] = float(data[i])
# print(line)
data_matrix.append(data)
ids.append(id)
labels.append(label)
return data_matrix, labels
def fillnan(data_matrix):
features = []
for j in range(len(data_matrix[0])):
feature = []
for i in range(len(data_matrix)):
data = data_matrix[i][j]
if data == "NaN":
continue
elif (j == 0 or j == 1) and data < 0:
continue
elif (j == 2 or j == 3) and (data < 0 and data > 1000):
continue
elif j == 4 and (data < 0 or data > 20):
continue
feature.append(data_matrix[i][j])
features.append(feature)
means = [0, 0, 0, 0, 0]
middle = [0, 0, 0, 0, 0]
for i in range(len(features)):
means[i] = mean(features[i])
features[i].sort()
length = len(features[i])
if length % 2 == 1:
# print(features)
middle[i] = features[i][int(length / 2)]
else:
middle[i] = (
features[i][int(length / 2)] + features[i][int(length / 2 - 1)]
) / 2
for j in range(len(data_matrix[0])):
feature = []
for i in range(len(data_matrix)):
data = data_matrix[i][j]
if data == "NaN":
data_matrix[i][j] = means[j]
elif (j == 0 or j == 1) and data < 0:
data_matrix[i][j] = middle[j]
elif (j == 2 or j == 3) and (data < 0 and data > 1000):
data_matrix[i][j] = middle[j]
elif j == 4 and (data < 0 or data > 20):
data_matrix[i][j] = middle[j]
return data_matrix
def train(data_matrix, labels, iter=100, lr=0.001):
w = [0.0] * 5
b = 0.0
for i in range(iter):
gradient = [0.0] * 6
for j in range(len(data_matrix)):
y = b
for k in range(len(data_matrix[0])):
y += w[k] * data_matrix[j][k]
y = 1 / (1 + math.exp(-y))
error = y - labels[j]
gradient[0] += error
for k in range(len(w)):
if labels[j] == 1:
gradient[k + 1] += error * data_matrix[j][k] * 2
else:
gradient[k + 1] += error * data_matrix[j][k]
b -= lr * gradient[0]
for k in range(len(w)):
w[k] -= lr * gradient[k + 1]
return w, b
def main():
data_matrix, labels = read_data()
data_matrix = fillnan(data_matrix)
w, b = train(data_matrix, labels)
test(w, b)
if __name__ == "__main__":
main()