Python27Logistic

Python2

Python5 pandas()

Python6 pandas(01)

Python7 pandas(02)

Python8 pandas(03)

Python9 pandas(04)

Python10 matplotlib()

Python11 matplotlib()

Python12 matplotlib()

Python13 matplotlib()

Python14 matplotlib()

Python15 matplotlib()

Python16 matplotlib()

Python17 matplotlib()

Python18 matplotlib()

Python19 matplotlib()

Python20

Python21

Python22

Python23

Python24LASSO

Python25LASSO

Python26Logistic

PythonRLogisticLogistic0.5P0.5

GitHubLogistic

GenderGenderR

User IDGenderOKLogistic

7LogisticMaleMaleLogistic

AICMale

1.25110000

prob = logistic2.predict(exog = X_test.drop(

cm = metrics.confusion_matrix(y_test, pred, labels=[

accuracy = cm.diagonal().sum()/cm.sum() accuracy

0.50.5ROC

fpr, tpr, _ = metrics.roc_curve(y_test, pred) df = pd.DataFrame(dict(fpr=fpr, tpr=tpr)) ggplot(df, aes(x=

) +\ geom_line() +\ geom_abline(linetype=

RPythonggplot2ROCAUC0.85AUC0.8

PythonLogisticRRR

purchase – read.csv(file = file.choose()) purchase$Purchased = factor(purchase$Purchased)

*nrow(purchase)) train = purchase[idx,] test = purchase[-idx,]

logit = glm(Purchased ~ ., data = train[,-

logit2 = glm(Purchased ~ ., data = train2, family = binomial(link =

prob = predict(logit2, newdata = test2) pred = factor(ifelse(prob =

ROC = roc(factor(test2$Purchased, levels=c(

-ROC$specificities, y = ROC$sensitivities) ggplot(df, aes(x = x, y = y)) + geom_area(alpha=

) + geom_line() + geom_abline(linetype=

))) + theme(plot.title = element_text(hjust =

OKPythonRLogistic

PythonPython

4.Spark2.X+Python