{"id":1807,"date":"2026-05-28T22:22:21","date_gmt":"2026-05-28T14:22:21","guid":{"rendered":"https:\/\/cabit.top\/?p=1807"},"modified":"2026-05-28T22:24:51","modified_gmt":"2026-05-28T14:24:51","slug":"machine-learning-with-real-world-data","status":"publish","type":"post","link":"https:\/\/cabit.top\/?p=1807","title":{"rendered":"\u4f7f\u7528\u771f\u5b9e\u4e16\u754c\u6570\u636e\u8fdb\u884c\u673a\u5668\u5b66\u4e60"},"content":{"rendered":"\n<p class=\"wp-block-paragraph\"><\/p>\n\n\n\n<p class=\"wp-block-paragraph\">\u4f5c\u8005\uff1aEric \u00b7 \u53d1\u8868\u4e8e 2023\u5e745\u670815\u65e5 \u00b7 \u66f4\u65b0\u4e8e 2024\u5e741\u670825\u65e5<\/p>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h3 class=\"wp-block-heading\">\u6587\u7ae0\u76ee\u5f55<\/h3>\n\n\n\n<ul class=\"wp-block-list\">\n<li><a href=\"#introduction\">\u5f15\u8a00<\/a><\/li>\n\n\n\n<li><a href=\"#background\">\u80cc\u666f<\/a>\n<ul class=\"wp-block-list\">\n<li><a href=\"#our-data\">\u6211\u4eec\u7684\u6570\u636e<\/a><\/li>\n\n\n\n<li><a href=\"#gauss-machine-learning\">GAUSS \u673a\u5668\u5b66\u4e60<\/a><\/li>\n<\/ul>\n<\/li>\n\n\n\n<li><a href=\"#data-exploration-and-cleaning\">\u6570\u636e\u63a2\u7d22\u4e0e\u6e05\u6d17<\/a>\n<ul class=\"wp-block-list\">\n<li><a href=\"#descriptive-statistics\">\u63cf\u8ff0\u6027\u7edf\u8ba1<\/a><\/li>\n\n\n\n<li><a href=\"#missing-values\">\u7f3a\u5931\u503c\u5904\u7406<\/a><\/li>\n\n\n\n<li><a href=\"#outliers\">\u5f02\u5e38\u503c\u5904\u7406<\/a><\/li>\n\n\n\n<li><a href=\"#data-truncation\">\u6570\u636e\u622a\u65ad<\/a><\/li>\n\n\n\n<li><a href=\"#feature-modifications\">\u7279\u5f81\u4fee\u6539<\/a><\/li>\n<\/ul>\n<\/li>\n\n\n\n<li><a href=\"#data-splitting\">\u6570\u636e\u5212\u5206<\/a><\/li>\n\n\n\n<li><a href=\"#fitting-our-model\">\u62df\u5408\u6a21\u578b<\/a>\n<ul class=\"wp-block-list\">\n<li><a href=\"#model-fitting\">\u6a21\u578b\u62df\u5408<\/a><\/li>\n\n\n\n<li><a href=\"#prediction\">\u9884\u6d4b<\/a><\/li>\n<\/ul>\n<\/li>\n\n\n\n<li><a href=\"#feature-engineering\">\u7279\u5f81\u5de5\u7a0b<\/a>\n<ul class=\"wp-block-list\">\n<li><a href=\"#fit-and-predict-the-new-model\">\u62df\u5408\u4e0e\u9884\u6d4b\u65b0\u6a21\u578b<\/a><\/li>\n<\/ul>\n<\/li>\n\n\n\n<li><a href=\"#extensions\">\u6269\u5c55\u4e0e\u5ef6\u4f38<\/a>\n<ul class=\"wp-block-list\">\n<li><a href=\"#conclusion\">\u7ed3\u8bba<\/a><\/li>\n\n\n\n<li><a href=\"#further-machine-learning-reading\">\u5ef6\u4f38\u9605\u8bfb<\/a><\/li>\n<\/ul>\n<\/li>\n<\/ul>\n\n\n\n<h3 class=\"wp-block-heading\">\u5f15\u8a00<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">\u5982\u679c\u60a8\u66fe\u4ece\u4e8b\u8fc7\u5b9e\u8bc1\u5de5\u4f5c\uff0c\u5c31\u4f1a\u77e5\u9053\u771f\u5b9e\u4e16\u754c\u7684\u6570\u636e\u5f88\u5c11\u4f1a\u4ee5\u5e72\u51c0\u3001\u53ef\u76f4\u63a5\u7528\u4e8e\u5efa\u6a21\u7684\u5f62\u5f0f\u5448\u73b0\u3002\u6ca1\u6709\u4efb\u4f55\u6570\u636e\u5206\u6790\u9879\u76ee\u4ec5\u4ec5\u5305\u62ec\u62df\u5408\u6a21\u578b\u548c\u505a\u51fa\u9884\u6d4b\u3002\u5728\u672c\u7bc7\u535a\u5ba2\u4e2d\uff0c\u6211\u4eec\u5c06\u4ece\u5934\u5230\u5c3e\u8d70\u5b8c\u4e00\u4e2a\u5b8c\u6574\u7684\u673a\u5668\u5b66\u4e60\u9879\u76ee\uff0c\u4e3a\u60a8\u63d0\u4f9b\u5728 GAUSS \u4e2d\u5b8c\u6210\u81ea\u5df1\u673a\u5668\u5b66\u4e60\u9879\u76ee\u7684\u57fa\u7840\uff0c\u6db5\u76d6\u4ee5\u4e0b\u5185\u5bb9\uff1a<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u6570\u636e\u63a2\u7d22\u4e0e\u6e05\u6d17<\/li>\n\n\n\n<li>\u8bad\u7ec3\u96c6\u548c\u6d4b\u8bd5\u96c6\u7684\u5212\u5206<\/li>\n\n\n\n<li>\u6a21\u578b\u62df\u5408\u4e0e\u9884\u6d4b<\/li>\n\n\n\n<li>\u57fa\u7840\u7279\u5f81\u5de5\u7a0b<\/li>\n<\/ul>\n\n\n\n<h2 class=\"wp-block-heading\">\u80cc\u666f<\/h2>\n\n\n\n<h3 class=\"wp-block-heading\">\u6211\u4eec\u7684\u6570\u636e<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">\u4eca\u5929\u6211\u4eec\u5c06\u4f7f\u7528&nbsp;<a href=\"https:\/\/www.kaggle.com\/datasets\/camnugent\/california-housing-prices\" target=\"_blank\" rel=\"noreferrer noopener\">Kaggle \u7684\u52a0\u5229\u798f\u5c3c\u4e9a\u623f\u4ef7\u6570\u636e\u96c6<\/a>\u3002\u8be5\u6570\u636e\u96c6\u57fa\u4e8e 1990 \u5e74\u7684\u4eba\u53e3\u666e\u67e5\u6570\u636e\u6784\u5efa\u3002\u867d\u7136\u5b83\u8f83\u8001\uff0c\u4f46\u4f5c\u4e3a\u6f14\u793a\u6570\u636e\u96c6\u975e\u5e38\u51fa\u8272\uff0c\u5e76\u5728\u8bb8\u591a\u673a\u5668\u5b66\u4e60\u793a\u4f8b\u4e2d\u5e7f\u53d7\u6b22\u8fce\u3002<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">\u8be5\u6570\u636e\u96c6\u5305\u542b\u5728\u52a0\u5229\u798f\u5c3c\u4e9a\u5dde\u533a\u5757\u7ec4\u7ea7\u522b\u6d4b\u91cf\u7684 10 \u4e2a\u53d8\u91cf\uff1a<\/p>\n\n\n\n<figure class=\"wp-block-table\"><table class=\"has-fixed-layout\"><tbody><tr><th>\u53d8\u91cf<\/th><th>\u63cf\u8ff0<\/th><\/tr><tr><td>longitude\uff08\u7ecf\u5ea6\uff09<\/td><td>\u623f\u5c4b\u5411\u897f\u8ddd\u79bb\u7684\u5ea6\u91cf<\/td><\/tr><tr><td>latitude\uff08\u7eac\u5ea6\uff09<\/td><td>\u623f\u5c4b\u5411\u5317\u8ddd\u79bb\u7684\u5ea6\u91cf<\/td><\/tr><tr><td>housing_median_age\uff08\u623f\u5c4b\u4e2d\u4f4d\u5e74\u9f84\uff09<\/td><td>\u533a\u5757\u5185\u623f\u5c4b\u7684\u4e2d\u4f4d\u5e74\u9f84<\/td><\/tr><tr><td>total_rooms\uff08\u603b\u623f\u95f4\u6570\uff09<\/td><td>\u533a\u5757\u5185\u623f\u95f4\u603b\u6570<\/td><\/tr><tr><td>total_bedrooms\uff08\u603b\u5367\u5ba4\u6570\uff09<\/td><td>\u533a\u5757\u5185\u5367\u5ba4\u603b\u6570<\/td><\/tr><tr><td>population\uff08\u4eba\u53e3\uff09<\/td><td>\u533a\u5757\u5185\u5c45\u4f4f\u603b\u4eba\u6570<\/td><\/tr><tr><td>households\uff08\u5bb6\u5ead\u6570\uff09<\/td><td>\u533a\u5757\u5185\u5bb6\u5ead\u603b\u6570\uff08\u4e00\u7fa4\u5c45\u4f4f\u5728\u4e00\u4e2a\u4f4f\u5b85\u5355\u5143\u5185\u7684\u4eba\uff09<\/td><\/tr><tr><td>median_income\uff08\u4e2d\u4f4d\u6536\u5165\uff09<\/td><td>\u533a\u5757\u5185\u5bb6\u5ead\u7684\u4e2d\u4f4d\u6536\u5165\uff08\u4ee5\u4e07\u7f8e\u5143\u8ba1\uff09<\/td><\/tr><tr><td>median_house_value\uff08\u623f\u5c4b\u4e2d\u4f4d\u4ef7\u503c\uff09<\/td><td>\u533a\u5757\u5185\u5bb6\u5ead\u7684\u4e2d\u4f4d\u623f\u5c4b\u4ef7\u503c<\/td><\/tr><tr><td>ocean_proximity\uff08\u6d77\u6d0b\u8ddd\u79bb\uff09<\/td><td>\u623f\u5c4b\u76f8\u5bf9\u4e8e\u6d77\u6d0b\/\u6d77\u7684\u4f4d\u7f6e<\/td><\/tr><\/tbody><\/table><\/figure>\n\n\n\n<h3 class=\"wp-block-heading\">GAUSS \u673a\u5668\u5b66\u4e60<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">\u6211\u4eec\u5c06\u4f7f\u7528\u5168\u65b0\u7684&nbsp;<a href=\"https:\/\/docs.aptech.com\/gauss\/gml-landing.html\" target=\"_blank\" rel=\"noreferrer noopener\">GAUSS \u673a\u5668\u5b66\u4e60\uff08GML\uff09\u5e93<\/a>\u3002\u8fd9\u4e2a\u5e93\u975e\u5e38\u7528\u6237\u53cb\u597d\uff0c\u63d0\u4f9b\u6613\u4e8e\u4f7f\u7528\u7684\u673a\u5668\u5b66\u4e60\u5de5\u5177\u6765\u5b9e\u73b0\u57fa\u7840\u673a\u5668\u5b66\u4e60\u6a21\u578b\u3002\u8981\u4f7f\u7528\u8fd9\u4e9b\u5de5\u5177\uff0c\u9700\u8981\u52a0\u8f7d\u8be5\u5e93\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-preformatted\">\/\/ Clear workspace and load library\nnew;\nlibrary gml;\n\n\/\/ Set random seed\nrndseed 8906876;<\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u6ce8\u610f\uff1a\u6211\u4eec\u8bbe\u7f6e\u4e86\u968f\u673a\u79cd\u5b50\u4ee5\u786e\u4fdd\u7ed3\u679c\u53ef\u590d\u73b0\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\">\u6570\u636e\u63a2\u7d22\u4e0e\u6e05\u6d17<\/h2>\n\n\n\n<p class=\"wp-block-paragraph\">\u52a0\u8f7d GML \u540e\uff0c\u6211\u4eec\u51c6\u5907\u5bfc\u5165\u548c<a href=\"https:\/\/www.aptech.com\/blog\/preparing-and-cleaning-data-fred-data-in-gauss\/\" target=\"_blank\" rel=\"noreferrer noopener\">\u6e05\u6d17\u6570\u636e<\/a>\u3002\u7b2c\u4e00\u6b65\u662f\u4f7f\u7528&nbsp;<code>loadd<\/code>&nbsp;\u8fc7\u7a0b\u5c06\u6570\u636e\u5bfc\u5165 GAUSS\u3002<\/p>\n\n\n\n<pre class=\"wp-block-preformatted\">\/*\n** Import datafile\n*\/\nload_path = \"data\/\";\nfname = \"housing.csv\";\n\n\/\/ Load all variables\nhousing_data = loadd(load_path $+ fname);<\/pre>\n\n\n\n<h3 class=\"wp-block-heading\">\u63cf\u8ff0\u6027\u7edf\u8ba1<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\"><a href=\"https:\/\/docs.aptech.com\/gauss\/data-management\/data-exploration.html\" target=\"_blank\" rel=\"noreferrer noopener\">\u63a2\u7d22\u6027\u6570\u636e\u5206\u6790<\/a>\u4f7f\u6211\u4eec\u80fd\u591f\u8bc6\u522b\u91cd\u8981\u7684\u6570\u636e\u5f02\u5e38\uff0c\u5982\u5f02\u5e38\u503c\u548c\u7f3a\u5931\u503c\u3002\u8ba9\u6211\u4eec\u5148\u4f7f\u7528&nbsp;<code>dstatmt<\/code>&nbsp;\u8fc7\u7a0b\u67e5\u770b\u6807\u51c6\u7684\u63cf\u8ff0\u6027\u7edf\u8ba1\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-preformatted\">\/\/ Find descriptive statistics\n\/\/ for all variables in housing_data\ndstatmt(housing_data);<\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u8fd9\u4f1a\u6253\u5370\u51fa\u6240\u6709\u53d8\u91cf\u7684\u7edf\u8ba1\u6c47\u603b\u8868\u3002<\/p>\n\n\n\n<pre class=\"wp-block-preformatted\">--------------------------------------------------------------------------------------------------\nVariable                  Mean     Std Dev      Variance     Minimum     Maximum     Valid Missing\n--------------------------------------------------------------------------------------------------\nlongitude               -119.6       2.004         4.014      -124.3      -114.3     20640    0\nlatitude                 35.63       2.136         4.562       32.54       41.95     20640    0\nhousing_median_age       28.64       12.59         158.4           1          52     20640    0\ntotal_rooms               2636        2182     4.759e+06           2   3.932e+04     20640    0\ntotal_bedrooms           537.9       421.4     1.776e+05           1        6445     20433  207\npopulation                1425        1132     1.282e+06           3   3.568e+04     20640    0\nhouseholds               499.5       382.3     1.462e+05           1        6082     20640    0\nmedian_income            3.871         1.9         3.609      0.4999          15     20640    0\nmedian_house_value   2.069e+05   1.154e+05     1.332e+10     1.5e+04       5e+05     20640    0\nocean_proximity          -----       -----         -----   &lt;1H OCEAN  NEAR OCEAN     20640    0<\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u8fd9\u4e9b\u7edf\u8ba1\u91cf\u4f7f\u6211\u4eec\u80fd\u591f\u5feb\u901f\u8bc6\u522b\u5728\u62df\u5408\u6a21\u578b\u4e4b\u524d\u9700\u8981\u5904\u7406\u7684\u51e0\u4e2a\u6570\u636e\u95ee\u9898\uff1a<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li><code>total_bedrooms<\/code>\u00a0\u53d8\u91cf\u5b58\u5728 207 \u4e2a\u7f3a\u5931\u89c2\u6d4b\u503c\u3002<\/li>\n\n\n\n<li>\u8bb8\u591a\u53d8\u91cf\u663e\u793a\u51fa\u6f5c\u5728\u7684\u5f02\u5e38\u503c\uff0c\u65b9\u5dee\u5927\u4e14\u8303\u56f4\u5e7f\u3002\u9700\u8981\u8fdb\u4e00\u6b65\u63a2\u7d22\u3002<\/li>\n<\/ol>\n\n\n\n<h3 class=\"wp-block-heading\">\u7f3a\u5931\u503c\u5904\u7406<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">\u4e3a\u4e86\u66f4\u597d\u5730\u4e86\u89e3\u5982\u4f55\u5904\u7406\u7f3a\u5931\u503c\uff0c\u8ba9\u6211\u4eec\u5206\u522b\u68c0\u67e5\u6709\u7f3a\u5931\u503c\u548c\u6ca1\u6709\u7f3a\u5931\u503c\u7684\u89c2\u6d4b\u503c\u7684\u63cf\u8ff0\u6027\u7edf\u8ba1\u3002<\/p>\n\n\n\n<pre class=\"wp-block-preformatted\">\/\/ Conditional check for missing values\ne = housing_data[., \"total_bedrooms\"] .== miss();\n\n\/\/ Get descriptive statistics for dataset with missing values\ndstatmt(selif(housing_data, e));<\/pre>\n\n\n\n<pre class=\"wp-block-preformatted\">------------------------------------------------------------------------------------------------\nVariable                 Mean     Std Dev      Variance     Minimum     Maximum   Valid  Missing\n------------------------------------------------------------------------------------------------\nlongitude              -119.5       2.001         4.006      -124.1      -114.6      207    0\nlatitude                 35.5       2.097         4.399       32.66       40.92      207    0\nhousing_median_age      29.27       11.96         143.2           4          52      207    0\ntotal_rooms              2563        1787     3.194e+06         154   1.171e+04      207    0\ntotal_bedrooms          -----       -----         -----        +INF        -INF        0  207\npopulation               1478        1057     1.118e+06          37        7604      207    0\nhouseholds                510       386.1     1.491e+05          16        3589      207    0\nmedian_income           3.822       1.956         3.824      0.8527          15      207    0\nmedian_house_value   2.06e+05   1.116e+05     1.246e+10    4.58e+04       5e+05      207    0\nocean_proximity         -----       -----         -----   &lt;1H OCEAN  NEAR OCEAN      207    0<\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u901a\u8fc7\u76ee\u89c6\u68c0\u67e5\uff0c\u6709\u7f3a\u5931\u503c\u7684\u6570\u636e\u7684\u63cf\u8ff0\u6027\u7edf\u8ba1\u4e0e\u6ca1\u6709\u7f3a\u5931\u503c\u7684\u6570\u636e\u7684\u63cf\u8ff0\u6027\u7edf\u8ba1\u975e\u5e38\u76f8\u4f3c\u3002\u6b64\u5916\uff0c\u7f3a\u5931\u503c\u5360\u603b\u89c2\u6d4b\u503c\u7684\u6bd4\u4f8b\u4e0d\u5230 1%\u3002\u56e0\u6b64\uff0c\u6211\u4eec\u5c06\u5220\u9664\u5305\u542b\u7f3a\u5931\u503c\u7684\u884c\uff0c\u800c\u4e0d\u662f<a href=\"https:\/\/www.aptech.com\/blog\/introduction-to-handling-missing-values\/\" target=\"_blank\" rel=\"noreferrer noopener\">\u63d2\u8865\u7f3a\u5931\u503c<\/a>\u3002<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">\u6211\u4eec\u53ef\u4ee5\u4f7f\u7528&nbsp;<code>packr<\/code>&nbsp;\u8fc7\u7a0b\u5220\u9664\u5305\u542b\u7f3a\u5931\u503c\u7684\u884c\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-preformatted\">\/\/ Remove rows with missing values from housing_data\nhousing_data = packr(housing_data);<\/pre>\n\n\n\n<h3 class=\"wp-block-heading\">\u5f02\u5e38\u503c\u5904\u7406<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">\u5220\u9664\u7f3a\u5931\u503c\u540e\uff0c\u8ba9\u6211\u4eec\u5bfb\u627e\u5176\u4ed6\u6570\u636e\u5f02\u5e38\u503c\u3002\u6570\u636e\u53ef\u89c6\u5316\u5982\u76f4\u65b9\u56fe\u548c\u7bb1\u7ebf\u56fe\u662f\u8bc6\u522b\u6f5c\u5728\u5f02\u5e38\u503c\u7684\u597d\u65b9\u6cd5\u3002\u9996\u5148\uff0c\u4e3a\u6240\u6709\u8fde\u7eed\u53d8\u91cf\u521b\u5efa\u4e00\u4e2a\u76f4\u65b9\u56fe\u7f51\u683c\u56fe\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-preformatted\">\/*\n** Data visualizations\n*\/\n\/\/ Get variables names\nvars = getColNames(housing_data);\n\n\/\/ Set up plotControl structure for formatting graphs\nstruct plotControl plt;\nplt = plotGetDefaults(\"bar\");\n\n\/\/ Set fonts\nplotSetFonts(&amp;plt, \"title\", \"Arial\", 14);\nplotSetFonts(&amp;plt, \"ticks\", \"Arial\", 12);\n\n\/\/ Loop through the variables and draw histograms\nfor i(1, rows(vars)-1, 1);\n    plotSetTitle(&amp;plt, vars[i]);\n    plotLayout(3, 3, i);\n    plotHist(plt, housing_data[., vars[i]], 50);\nendfor;<\/pre>\n\n\n\n<figure class=\"wp-block-image size-large\"><img loading=\"lazy\" decoding=\"async\" width=\"1024\" height=\"683\" src=\"https:\/\/cabit.top\/wp-content\/uploads\/2026\/05\/image-26-1024x683.png\" alt=\"\" class=\"wp-image-1809\" srcset=\"https:\/\/cabit.top\/wp-content\/uploads\/2026\/05\/image-26-1024x683.png 1024w, https:\/\/cabit.top\/wp-content\/uploads\/2026\/05\/image-26-300x200.png 300w, https:\/\/cabit.top\/wp-content\/uploads\/2026\/05\/image-26-768x512.png 768w, https:\/\/cabit.top\/wp-content\/uploads\/2026\/05\/image-26-1536x1024.png 1536w, https:\/\/cabit.top\/wp-content\/uploads\/2026\/05\/image-26-2048x1365.png 2048w, https:\/\/cabit.top\/wp-content\/uploads\/2026\/05\/image-26-670x447.png 670w\" sizes=\"auto, (max-width: 1024px) 100vw, 1024px\" \/><\/figure>\n\n\n\n<p class=\"wp-block-paragraph\">\u4ece\u76f4\u65b9\u56fe\u53ef\u4ee5\u770b\u51fa\uff0c\u51e0\u4e2a\u53d8\u91cf\u5b58\u5728\u5f02\u5e38\u503c\u95ee\u9898\uff1a<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li><code>total_rooms<\/code>\u00a0\u53d8\u91cf\uff0c\u5927\u90e8\u5206\u6570\u636e\u5206\u5e03\u5728 0 \u5230 10,000 \u4e4b\u95f4\u3002<\/li>\n\n\n\n<li><code>total_bedrooms<\/code>\u00a0\u53d8\u91cf\uff0c\u5927\u90e8\u5206\u6570\u636e\u5206\u5e03\u5728 0 \u5230 2,000 \u4e4b\u95f4\u3002<\/li>\n\n\n\n<li><code>households<\/code>\u00a0\u53d8\u91cf\uff0c\u5927\u90e8\u5206\u6570\u636e\u5206\u5e03\u5728 0 \u5230 2,000 \u4e4b\u95f4\u3002<\/li>\n\n\n\n<li><code>population<\/code>\u00a0\u53d8\u91cf\uff0c\u5927\u90e8\u5206\u6570\u636e\u5206\u5e03\u5728 0 \u5230 100,000 \u4e4b\u95f4\u3002<\/li>\n<\/ul>\n\n\n\n<p class=\"wp-block-paragraph\">\u8fd9\u4e9b\u53d8\u91cf\u7684\u7bb1\u7ebf\u56fe\u786e\u8ba4\u4e86\u5f02\u5e38\u503c\u7684\u5b58\u5728\u3002<\/p>\n\n\n\n<pre class=\"wp-block-preformatted\">plt = plotGetDefaults(\"box\");\n\n\/\/ Set fonts\nplotSetFonts(&amp;plt, \"title\", \"Arial\", 14);\nplotSetFonts(&amp;plt, \"ticks\", \"Arial\", 12);\n\nstring box_vars = { \"total_rooms\", \"total_bedrooms\", \"households\", \"population\" };\n\n\/\/ Loop through the variables and draw boxplots\nfor i(1, rows(box_vars), 1);\n    plotLayout(2, 2, i);\n    plotBox(plt, box_vars[i], housing_data[., box_vars[i]]);\nendfor;<\/pre>\n\n\n\n<figure class=\"wp-block-image size-large\"><img loading=\"lazy\" decoding=\"async\" width=\"1024\" height=\"683\" src=\"https:\/\/cabit.top\/wp-content\/uploads\/2026\/05\/image-27-1024x683.png\" alt=\"\" class=\"wp-image-1810\" srcset=\"https:\/\/cabit.top\/wp-content\/uploads\/2026\/05\/image-27-1024x683.png 1024w, https:\/\/cabit.top\/wp-content\/uploads\/2026\/05\/image-27-300x200.png 300w, https:\/\/cabit.top\/wp-content\/uploads\/2026\/05\/image-27-768x512.png 768w, https:\/\/cabit.top\/wp-content\/uploads\/2026\/05\/image-27-1536x1024.png 1536w, https:\/\/cabit.top\/wp-content\/uploads\/2026\/05\/image-27-2048x1365.png 2048w, https:\/\/cabit.top\/wp-content\/uploads\/2026\/05\/image-27-670x447.png 670w\" sizes=\"auto, (max-width: 1024px) 100vw, 1024px\" \/><\/figure>\n\n\n\n<p class=\"wp-block-paragraph\">\u8ba9\u6211\u4eec\u8fc7\u6ee4\u6570\u636e\u4ee5\u6d88\u9664\u8fd9\u4e9b\u5f02\u5e38\u503c\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-preformatted\">\/*\n** Filter to remove outliers\n**\n** Delete:\n**    - total_rooms greater than or equal to 10000\n**    - total_bedrooms greater than or equal to 20000\n**    - households greater than or equal to 2000\n**    - population greater than or equal to 6000\n*\/\nmask = housing_data[., \"total_rooms\"] .&gt;= 10000;\nmask = mask .or housing_data[., \"total_bedrooms\"] .&gt;= 2000;\nmask = mask .or housing_data[., \"households\"] .&gt;= 2000;\nmask = mask .or housing_data[., \"population\"] .&gt;= 6000;\n\nhousing_data = delif(housing_data, mask);<\/pre>\n\n\n\n<figure class=\"wp-block-image size-large\"><img loading=\"lazy\" decoding=\"async\" width=\"1024\" height=\"683\" src=\"https:\/\/cabit.top\/wp-content\/uploads\/2026\/05\/image-28-1024x683.png\" alt=\"\" class=\"wp-image-1811\" srcset=\"https:\/\/cabit.top\/wp-content\/uploads\/2026\/05\/image-28-1024x683.png 1024w, https:\/\/cabit.top\/wp-content\/uploads\/2026\/05\/image-28-300x200.png 300w, https:\/\/cabit.top\/wp-content\/uploads\/2026\/05\/image-28-768x512.png 768w, https:\/\/cabit.top\/wp-content\/uploads\/2026\/05\/image-28-1536x1024.png 1536w, https:\/\/cabit.top\/wp-content\/uploads\/2026\/05\/image-28-2048x1365.png 2048w, https:\/\/cabit.top\/wp-content\/uploads\/2026\/05\/image-28-670x447.png 670w\" sizes=\"auto, (max-width: 1024px) 100vw, 1024px\" \/><\/figure>\n\n\n\n<p class=\"wp-block-paragraph\">\u6ce8\u610f\uff1a\u6211\u4eec\u91c7\u53d6\u4e86\u4fdd\u5b88\u7684\u5f02\u5e38\u503c\u8fc7\u6ee4\u65b9\u6cd5\uff0c\u5e76\u672a\u5220\u9664\u7bb1\u7ebf\u56fe\u8bc6\u522b\u7684\u6240\u6709\u5f02\u5e38\u6570\u636e\u70b9\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">\u6570\u636e\u622a\u65ad<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">\u76f4\u65b9\u56fe\u8fd8\u6307\u51fa\u4e86&nbsp;<code>housing_median_age<\/code>&nbsp;\u548c&nbsp;<code>median_house_value<\/code>&nbsp;\u7684\u622a\u65ad\u95ee\u9898\u3002\u8ba9\u6211\u4eec\u8fdb\u4e00\u6b65\u63a2\u7a76\uff1a<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li>\u4f7f\u7528\u00a0<code>modec<\/code>\u00a0\u786e\u8ba4\u8fd9\u4e9b\u662f\u6700\u9891\u7e41\u51fa\u73b0\u7684\u89c2\u6d4b\u503c\uff0c\u4e3a\u622a\u65ad\u70b9\u63d0\u4f9b\u8bc1\u636e\u3002<\/li>\n\n\n\n<li>\u7edf\u8ba1\u8fd9\u4e9b\u4f4d\u7f6e\u4e0a\u7684\u89c2\u6d4b\u503c\u6570\u91cf\u3002<\/li>\n<\/ol>\n\n\n\n<pre class=\"wp-block-preformatted\">\/\/ House value\nmode_value = modec(housing_data[., \"median_house_value\"]);\nprint \"Most frequent median_house_value:\" mode_value;\nprint \"Counts:\";\nsumc(housing_data[., \"median_house_value\"] .== mode_value);\n\n\/\/ House age\nmode_age = modec(housing_data[., \"housing_median_age\"]);\nprint \"Most frequent housing_median_age:\" mode_age;\nprint \"Counts:\";\nsumc(housing_data[., \"housing_median_age\"] .== mode_age);<\/pre>\n\n\n\n<pre class=\"wp-block-preformatted\">Most frequent median_house_value:\n       500001.00\nCounts:\n       935.00000\nMost frequent housing_median_age:\n       52.000000\nCounts:\n       1262.0000<\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u8fd9\u4e9b\u89c2\u6d4b\u503c\u5408\u8ba1\u5360\u603b\u89c2\u6d4b\u503c\u7684\u7ea6 10%\u3002\u7531\u4e8e\u6211\u4eec\u65e0\u6cd5\u83b7\u5f97\u6709\u5173\u8fd9\u4e9b\u70b9\u7684\u66f4\u591a\u4fe1\u606f\uff0c\u5c06\u5176\u4ece\u6a21\u578b\u4e2d\u5220\u9664\u3002<\/p>\n\n\n\n<pre class=\"wp-block-preformatted\">\/\/ Create binary vector with a 1 if either\n\/\/ 'housing_median_age' or 'median_house_value'\n\/\/ equal their mode value.\nmask = (housing_data[., \"housing_median_age\"] .== mode_age)\n       .or (housing_data[., \"median_house_value\"] .== mode_value);\n\/\/ Delete the rows if they meet our above criteria\nhousing_data = delif(housing_data, mask);<\/pre>\n\n\n\n<h3 class=\"wp-block-heading\">\u7279\u5f81\u4fee\u6539<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">\u6700\u540e\u7684\u6570\u636e\u6e05\u6d17\u6b65\u9aa4\u662f\u8fdb\u884c\u7279\u5f81\u4fee\u6539\uff0c\u5305\u62ec\uff1a<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li>\u5c06\u00a0<code>median_house_value<\/code>\u00a0\u53d8\u91cf\u91cd\u65b0\u7f29\u653e\u5230\u4ee5\u4e07\u7f8e\u5143\u4e3a\u5355\u4f4d\uff08\u4e0e\u00a0<code>median_income<\/code>\u00a0\u76f8\u540c\u7684\u5c3a\u5ea6\uff09\u3002<\/li>\n\n\n\n<li>\u4e3a\u00a0<code>ocean_proximity<\/code>\u00a0\u7684\u7c7b\u522b\u751f\u6210\u865a\u62df\u53d8\u91cf\u3002<\/li>\n<\/ol>\n\n\n\n<p class=\"wp-block-paragraph\">\u9996\u5148\uff0c\u91cd\u65b0\u7f29\u653e&nbsp;<code>median_house_value<\/code>\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-preformatted\">\/\/ Rescale median income variable\nhousing_data[., \"median_house_value\"] = \n    housing_data[., \"median_house_value\"] .\/ 10000;<\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u63a5\u4e0b\u6765\u4e3a&nbsp;<code>ocean_proximity<\/code>&nbsp;\u751f\u6210\u865a\u62df\u53d8\u91cf\u3002\u4f7f\u7528&nbsp;<code>frequency<\/code>&nbsp;\u8fc7\u7a0b\u4e86\u89e3\u5206\u7c7b\u6570\u636e\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-preformatted\">\/\/ Check frequency of ocean_proximity categories\nfrequency(housing_data, \"ocean_proximity\");<\/pre>\n\n\n\n<pre class=\"wp-block-preformatted\">     Label      Count   Total %    Cum. %\n &lt;1H OCEAN       8095     44.89     44.89\n    INLAND       6136     34.03     78.93\n    ISLAND          2   0.01109     78.94\n  NEAR BAY       1525     8.458     87.39\nNEAR OCEAN       2273     12.61       100\n     Total      18031       100<\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u4ece\u8868\u4e2d\u53ef\u4ee5\u770b\u51fa\uff0c<code>ISLAND<\/code>&nbsp;\u7c7b\u522b\u975e\u5e38\u5c0f\u3002\u5c06\u5176\u4ece\u5efa\u6a21\u6570\u636e\u96c6\u4e2d\u6392\u9664\u3002\u73b0\u5728\u4f7f\u7528&nbsp;<code>oneHot<\/code>&nbsp;\u8fc7\u7a0b\u521b\u5efa\u865a\u62df\u53d8\u91cf\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-preformatted\">\/*\n** Generate dummy variables for \n** the ocean_proximity using one hot encoding\n*\/\ndummy_matrix = oneHot(housing_data[., \"ocean_proximity\"]);\n\n\/*\n** Build matrix of features\n** Note we exclude:\n**     - ISLAND dummy variable\n**     - Original ocean_proximity variable\n*\/\nmodel_data = delcols(housing_data, \"ocean_proximity\") ~ \n    delcols(dummy_matrix, \"ocean_proximity_ISLAND\");\n\n\/\/ Saved data matrix\nsaved(model_data, load_path $+ \"\/model_data.gdat\");<\/pre>\n\n\n\n<h2 class=\"wp-block-heading\">\u6570\u636e\u5212\u5206<\/h2>\n\n\n\n<p class=\"wp-block-paragraph\">\u5728\u673a\u5668\u5b66\u4e60\u4e2d\uff0c\u901a\u5e38\u4f7f\u7528\u4e0d\u540c\u7684\u6570\u636e\u96c6\u6765\u62df\u5408\u6a21\u578b\u548c\u8bc4\u4f30\u6a21\u578b\u6027\u80fd\u3002\u7531\u4e8e\u673a\u5668\u5b66\u4e60\u6a21\u578b\u7684\u76ee\u6807\u662f\u4e3a\u672a\u89c1\u8fc7\u7684\u6570\u636e\u63d0\u4f9b\u9884\u6d4b\uff0c\u4f7f\u7528\u6d4b\u8bd5\u96c6\u53ef\u4ee5\u66f4\u771f\u5b9e\u5730\u8861\u91cf\u6a21\u578b\u7684\u8868\u73b0\u3002\u8981\u51c6\u5907\u8bad\u7ec3\u548c\u6d4b\u8bd5\u6570\u636e\uff0c\u6211\u4eec\u91c7\u53d6\u4e24\u4e2a\u6b65\u9aa4\uff1a<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li>\u5206\u79bb\u76ee\u6807\u53d8\u91cf\u00a0<code>median_house_value<\/code>\u00a0\u548c\u7279\u5f81\u96c6\u3002<\/li>\n\n\n\n<li>\u4f7f\u7528\u00a0<code>trainTestSplit<\/code>\u00a0\u5c06\u6570\u636e\u5212\u5206\u4e3a 70% \u8bad\u7ec3\u96c6\u548c 30% \u6d4b\u8bd5\u96c6\u3002<\/li>\n<\/ol>\n\n\n\n<pre class=\"wp-block-preformatted\">new;\nlibrary gml;\nrndseed 896876;\n\n\/*\n** Load datafile\n*\/\nload_path = \"data\/\";\nfname = \"model_data.gdat\";\nhousing_data = loadd(load_path $+ fname);\n\n\/*\n** Feature management\n*\/\n\/\/ Separate dependent and independent data\ny = housing_data[., \"median_house_value\"];\nX = delcols(housing_data, \"median_house_value\");\n\n\/\/ Split into 70% training data and 30% testing data\n{ y_train, y_test, X_train, X_test } = trainTestSplit(y, X, 0.7);<\/pre>\n\n\n\n<h2 class=\"wp-block-heading\">\u62df\u5408\u6a21\u578b<\/h2>\n\n\n\n<p class=\"wp-block-paragraph\">\u5b8c\u6210\u6570\u636e\u6e05\u6d17\u540e\uff0c\u6211\u4eec\u7ec8\u4e8e\u51c6\u5907\u597d\u62df\u5408\u6a21\u578b\u4e86\u3002\u4eca\u5929\uff0c\u6211\u4eec\u5c06\u4f7f\u7528 LASSO \u56de\u5f52\u6a21\u578b\u6765\u9884\u6d4b\u76ee\u6807\u53d8\u91cf\u3002LASSO \u662f\u4e00\u79cd\u6b63\u5219\u5316\u5f62\u5f0f\uff0c\u5728\u7ecf\u6d4e\u548c\u91d1\u878d\u5efa\u6a21\u4e2d\u53d6\u5f97\u4e86\u76f8\u5bf9\u6210\u529f\u3002\u5b83\u4e3a\u5904\u7406\u7ebf\u6027\u6a21\u578b\u4e2d\u7684\u9ad8\u7ef4\u6027\u95ee\u9898\u63d0\u4f9b\u4e86\u6570\u636e\u9a71\u52a8\u7684\u65b9\u6cd5\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">\u6a21\u578b\u62df\u5408<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">\u8981\u4f7f\u7528 LASSO \u6a21\u578b\u62df\u5408\u76ee\u6807\u53d8\u91cf&nbsp;<code>median_house_value<\/code>\uff0c\u6211\u4eec\u4f7f\u7528 GAUSS \u673a\u5668\u5b66\u4e60\u5e93\u4e2d\u7684&nbsp;<code>lassoFit<\/code>\u3002<\/p>\n\n\n\n<pre class=\"wp-block-preformatted\">\/*\n** LASSO Model\n*\/\n\/\/ Set lambda values\nlambda = { 0, 0.1, 0.3 };\n\n\/\/ Declare 'mdl' to be an instance of a\n\/\/ lassoModel structure to hold the estimation results\nstruct lassoModel mdl;\n\n\/\/ Estimate the model with default settings\nmdl = lassoFit(y_train, X_train, lambda);<\/pre>\n\n\n\n<p class=\"wp-block-paragraph\"><code>lassoFit<\/code>&nbsp;\u8fc7\u7a0b\u4f1a\u6253\u5370\u6a21\u578b\u63cf\u8ff0\u548c\u7ed3\u679c\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-preformatted\">==============================================================================\nModel:                        Lasso     Target Variable:    median_house_value\nNumber observations:          12622     Number features:                    12\n==============================================================================\n\n===========================================================\n                    Lambda          0        0.1        0.3\n===========================================================\n\n                 longitude     -2.347     -1.013   -0.02555\n                   latitude     -2.192    -0.9269          0\n         housing_median_age    0.07189    0.06384    0.03977\n                total_rooms  -0.001004          0          0\n             total_bedrooms    0.01165   0.006107   0.004828\n                 population  -0.004317  -0.003396  -0.001232\n                 households   0.006808   0.005119          0\n              median_income      3.872      3.569      3.457\n ocean_proximity__1H OCEAN     -5.509          0          0\n    ocean_proximity_INLAND     -9.437     -5.639     -6.575\n  ocean_proximity_NEAR BAY     -7.083    -0.6395          0\nocean_proximity_NEAR OCEAN     -5.198     0.6378     0.6981\n                    CONST.     -193.5     -82.98      3.451\n===========================================================\n                        DF         12         10          7\n              Training MSE       33.7       34.7       37.4<\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u7ed3\u679c\u7a81\u51fa\u4e86 LASSO \u7684\u53d8\u91cf\u9009\u62e9\u529f\u80fd\u3002\u5f53 \u03bb=0\uff08\u5b8c\u6574\u7684\u6700\u5c0f\u4e8c\u4e58\u6a21\u578b\uff09\u65f6\uff0c\u6240\u6709\u7279\u5f81\u90fd\u5305\u542b\u5728\u6a21\u578b\u4e2d\u3002\u5f53 \u03bb=0.3 \u65f6\uff0cLASSO \u56de\u5f52\u5220\u9664\u4e86 12 \u4e2a\u53d8\u91cf\u4e2d\u7684 4 \u4e2a\uff1a<code>latitude<\/code>\u3001<code>total_rooms<\/code>\u3001<code>ocean_proximity__1H OCEAN<\/code>\u3001<code>ocean_proximity_NEAR BAY<\/code>\u3002\u6b63\u5982\u9884\u671f\uff0c<code>median_income<\/code>&nbsp;\u6709\u8f83\u5927\u7684\u6b63\u5411\u5f71\u54cd\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">\u9884\u6d4b<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">\u6211\u4eec\u73b0\u5728\u53ef\u4ee5\u4f7f\u7528&nbsp;<code>lmPredict<\/code>&nbsp;\u6765\u6d4b\u8bd5\u6a21\u578b\u5728\u6d4b\u8bd5\u6570\u636e\u4e0a\u7684\u9884\u6d4b\u80fd\u529b\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-preformatted\">\/\/ Predictions\npredictions = lmPredict(mdl, X_test);\n\n\/\/ Get MSE\ntesting_MSE = meanSquaredError(predictions, y_test);\nprint \"Testing MSE\"; testing_MSE;<\/pre>\n\n\n\n<pre class=\"wp-block-preformatted\">Testing MSE\n\n       33.814993\n       34.726144\n       37.199771<\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u6b63\u5982\u9884\u671f\uff0c\u8fd9\u4e9b\u503c\u5927\u591a\u9ad8\u4e8e\u8bad\u7ec3 MSE\uff0c\u4f46\u76f8\u5dee\u4e0d\u5927\u3002\u03bb \u503c\u6700\u9ad8\u7684\u6a21\u578b\u7684\u6d4b\u8bd5 MSE \u5b9e\u9645\u4e0a\u4f4e\u4e8e\u8bad\u7ec3 MSE\uff0c\u8fd9\u8868\u660e\u6211\u4eec\u7684\u6a21\u578b\u6ca1\u6709\u8fc7\u62df\u5408\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\">\u7279\u5f81\u5de5\u7a0b<\/h2>\n\n\n\n<p class=\"wp-block-paragraph\">\u7531\u4e8e\u6a21\u578b\u6ca1\u6709\u8fc7\u62df\u5408\uff0c\u6211\u4eec\u53ef\u4ee5\u5411\u6a21\u578b\u6dfb\u52a0\u66f4\u591a\u53d8\u91cf\u3002\u6211\u4eec\u4ece\u5f53\u524d\u7279\u5f81\u7684\u7ec4\u5408\u4e2d\u521b\u5efa\u4e00\u4e9b\u65b0\u7279\u5f81\u3002\u8fd9\u90e8\u5206\u8fc7\u7a0b\u79f0\u4e3a\u7279\u5f81\u5de5\u7a0b\uff0c\u53ef\u4ee5\u5bf9\u673a\u5668\u5b66\u4e60\u6a21\u578b\u505a\u51fa\u91cd\u5927\u8d21\u732e\u3002<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">\u9996\u5148\u4e3a&nbsp;<code>total_rooms<\/code>\u3001<code>total_bedrooms<\/code>&nbsp;\u548c&nbsp;<code>households<\/code>&nbsp;\u751f\u6210\u4eba\u5747\u53d8\u91cf\u3002<\/p>\n\n\n\n<pre class=\"wp-block-preformatted\">\/*\n** Create per capita variables using population\n*\/\npc_data = housing_data[., \"total_rooms\" \"total_bedrooms\" \"households\"] \n    .\/ housing_data[., \"population\"];\n\n\/\/ Convert to a dataframe and add variable names\npc_data = asdf(pc_data, \"rooms_pc\"$|\"bedrooms_pc\"$|\"households_pc\");<\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u63a5\u4e0b\u6765\u521b\u5efa\u4e00\u4e2a\u8868\u793a&nbsp;<code>total_bedrooms<\/code>&nbsp;\u5360&nbsp;<code>total_rooms<\/code>&nbsp;\u767e\u5206\u6bd4\u7684\u53d8\u91cf\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-preformatted\">beds_per_room = X[.,\"total_bedrooms\"] .\/ X[.,\"total_rooms\"];\nX = X ~ pc_data ~ asdf(beds_per_room, \"beds_per_room\");<\/pre>\n\n\n\n<h3 class=\"wp-block-heading\">\u62df\u5408\u4e0e\u9884\u6d4b\u65b0\u6a21\u578b<\/h3>\n\n\n\n<pre class=\"wp-block-preformatted\">\/\/ Reset the random seed so we get the\n\/\/ same test and train splits as our previous model\nrndseed 896876;\n\n\/\/ Split our new X into train and test splits\n{ y_train, y_test, X_train, X_test } = trainTestSplit(y, X, 0.7);\n\n\/\/ Set lambda values\nlambda = { 0, 0.1, 0.3 };\n\n\/\/ Declare 'mdl' to be an instance of a\n\/\/ lassoModel structure to hold the estimation results\nstruct lassoModel mdl;\n\n\/\/ Estimate the model with default settings\nmdl = lassoFit(y_train, X_train, lambda);\n\n\/\/ Predictions\npredictions = lmPredict(mdl, X_test);\n\n\/\/ Get MSE\ntesting_MSE = meanSquaredError(predictions, y_test);\nprint \"Testing MSE\"; testing_MSE;<\/pre>\n\n\n\n<pre class=\"wp-block-preformatted\">==============================================================================\nModel:                        Lasso     Target Variable:    median_house_value\nNumber observations:          12622     Number features:                    16\n==============================================================================\n\n===========================================================\n                    Lambda          0        0.1        0.3\n===========================================================\n\n                 longitude     -2.495     -1.008          0\n                   latitude      -2.36    -0.9354          0\n         housing_median_age     0.0808    0.07167    0.04316\n                total_rooms -0.0001714          0          0\n             total_bedrooms   0.005301   0.001517  0.0008104\n                 population -0.0004661          0          0\n                 households  -0.001611          0          0\n              median_income      3.947      4.011      3.675\n ocean_proximity__1H OCEAN     -5.171          0          0\n    ocean_proximity_INLAND     -8.635     -4.963     -6.235\n  ocean_proximity_NEAR BAY     -6.966     -0.875          0\nocean_proximity_NEAR OCEAN     -5.219     0.2927     0.1798\n                  rooms_pc      2.678     0.1104          0\n               bedrooms_pc     -11.68          0          0\n             households_pc      22.23      21.47      20.23\n             beds_per_room      33.03      17.03      8.029\n                    CONST.     -221.9     -95.55     -3.059\n===========================================================\n                        DF         16         11          7\n              Training MSE       31.6       32.5       34.3\nTesting MSE\n\n       31.505169\n       32.457936\n       34.155290<\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u5728\u6240\u6709\u7684 \u03bb \u503c\u4e0b\uff0c\u8bad\u7ec3\u548c\u6d4b\u8bd5 MSE \u90fd\u6709\u6240\u6539\u5584\u3002\u5728\u65b0\u53d8\u91cf\u4e2d\uff0c<code>households_pc<\/code>&nbsp;\u548c&nbsp;<code>beds_per_room<\/code>&nbsp;\u4f3c\u4e4e\u5177\u6709\u6700\u5f3a\u7684\u5f71\u54cd\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\">\u6269\u5c55\u4e0e\u5ef6\u4f38<\/h2>\n\n\n\n<p class=\"wp-block-paragraph\">\u6211\u4eec\u4f7f\u7528\u4e86\u7ebf\u6027\u56de\u5f52\u6a21\u578b LASSO \u6765\u5efa\u6a21\u623f\u5c4b\u4ef7\u503c\u3002\u8fd9\u4e2a\u9009\u62e9\u662f\u4e34\u65f6\u7684\uff0c\u8fd8\u6709\u8bb8\u591a\u66ff\u4ee3\u65b9\u6cd5\u548c\u6269\u5c55\u53ef\u4ee5\u5e2e\u52a9\u6539\u8fdb\u9884\u6d4b\u3002\u4f8b\u5982\uff1a<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u4f7f\u7528<a href=\"https:\/\/docs.aptech.com\/gauss\/kmeansfit.html\" target=\"_blank\" rel=\"noreferrer noopener\">\u805a\u7c7b<\/a>\u6216\u00a0<a href=\"https:\/\/docs.aptech.com\/gauss\/knnfit.html\" target=\"_blank\" rel=\"noreferrer noopener\">K \u8fd1\u90bb<\/a>\u6765\u6355\u6349\u66f4\u591a\u4f4d\u7f6e\u4fe1\u606f\u3002<\/li>\n\n\n\n<li>\u4f7f\u7528<a href=\"https:\/\/www.aptech.com\/blog\/applications-of-principal-components-analysis-in-finance\/\" target=\"_blank\" rel=\"noreferrer noopener\">\u4e3b\u6210\u5206\u5206\u6790<\/a>\u6355\u6349\u7279\u5f81\u7684\u53d8\u5f02\uff0c\u7136\u540e\u4f30\u8ba1\u4e2d\u4f4d\u623f\u5c4b\u4ef7\u503c\u4e0e\u4e3b\u6210\u5206\u4e4b\u95f4\u7684\u7ebf\u6027\u5173\u7cfb\u3002<\/li>\n\n\n\n<li>\u4f7f\u7528\u00a0<a href=\"https:\/\/docs.aptech.com\/gauss\/decforestrfit.html\" target=\"_blank\" rel=\"noreferrer noopener\">\u968f\u673a\u68ee\u6797\u6a21\u578b<\/a>\uff0c\u8fd9\u5bf9\u4e8e\u8868\u683c\u6570\u636e\u96c6\u901a\u5e38\u80fd\u63d0\u4f9b\u5f88\u597d\u7684\u51c6\u786e\u6027\u3002<\/li>\n\n\n\n<li>\u5c06\u623f\u5c4b\u4ef7\u503c\u5206\u6210\u591a\u4e2a\u533a\u95f4\uff0c\u8fdb\u884c<a href=\"https:\/\/www.aptech.com\/blog\/predicting-recessions-with-machine-learning-techniques\/\" target=\"_blank\" rel=\"noreferrer noopener\">\u5206\u7c7b<\/a>\u800c\u975e\u56de\u5f52\u3002<\/li>\n<\/ul>\n\n\n\n<h3 class=\"wp-block-heading\">\u7ed3\u8bba<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">\u5728\u4eca\u5929\u7684\u535a\u5ba2\u4e2d\uff0c\u6211\u4eec\u770b\u5230\u4e86\u6570\u636e\u63a2\u7d22\u548c\u6e05\u6d17\u5728\u5f00\u53d1\u673a\u5668\u5b66\u4e60\u6a21\u578b\u4e2d\u7684\u91cd\u8981\u4f5c\u7528\u3002\u6211\u4eec\u5f88\u5c11\u80fd\u83b7\u5f97\u53ef\u4ee5\u76f4\u63a5\u8f93\u5165\u6a21\u578b\u7684\u6570\u636e\u3002\u6700\u4f73\u5b9e\u8df5\u662f\u82b1\u65f6\u95f4\u8fdb\u884c\u6570\u636e\u63a2\u7d22\u548c\u6e05\u6d17\uff0c\u56e0\u4e3a\u4efb\u4f55\u673a\u5668\u5b66\u4e60\u6a21\u578b\u7684\u53ef\u9760\u6027\u90fd\u53d6\u51b3\u4e8e\u5176\u6570\u636e\u7684\u8d28\u91cf\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">\u5ef6\u4f38\u9605\u8bfb<\/h3>\n\n\n\n<ol class=\"wp-block-list\">\n<li><a href=\"https:\/\/www.aptech.com\/blog\/predicting-recessions-with-machine-learning-techniques\/\" target=\"_blank\" rel=\"noreferrer noopener\">\u4f7f\u7528\u673a\u5668\u5b66\u4e60\u6280\u672f\u9884\u6d4b\u7ecf\u6d4e\u8870\u9000<\/a><\/li>\n\n\n\n<li><a href=\"https:\/\/www.aptech.com\/blog\/applications-of-principal-components-analysis-in-finance\/\" target=\"_blank\" rel=\"noreferrer noopener\">\u4e3b\u6210\u5206\u5206\u6790\u5728\u91d1\u878d\u4e2d\u7684\u5e94\u7528<\/a><\/li>\n\n\n\n<li><a href=\"https:\/\/www.aptech.com\/blog\/predicting-the-output-gap-with-machine-learning-regression-models\/\" target=\"_blank\" rel=\"noreferrer noopener\">\u4f7f\u7528\u673a\u5668\u5b66\u4e60\u56de\u5f52\u6a21\u578b\u9884\u6d4b\u4ea7\u51fa\u7f3a\u53e3<\/a><\/li>\n\n\n\n<li><a href=\"https:\/\/www.aptech.com\/blog\/fundamentals-of-tuning-machine-learning-hyperparameters\/\" target=\"_blank\" rel=\"noreferrer noopener\">\u673a\u5668\u5b66\u4e60\u8d85\u53c2\u6570\u8c03\u4f18\u57fa\u7840<\/a><\/li>\n\n\n\n<li><a href=\"https:\/\/www.aptech.com\/blog\/understanding-cross-validation\/\" target=\"_blank\" rel=\"noreferrer noopener\">\u7406\u89e3\u4ea4\u53c9\u9a8c\u8bc1<\/a><\/li>\n\n\n\n<li><a href=\"https:\/\/www.aptech.com\/blog\/classification-with-regularized-logistic-regression\/\" target=\"_blank\" rel=\"noreferrer noopener\">\u57fa\u4e8e\u6b63\u5219\u5316\u903b\u8f91\u56de\u5f52\u7684\u5206\u7c7b<\/a><\/li>\n<\/ol>\n","protected":false},"excerpt":{"rendered":"<p>\u4f5c\u8005\uff1aEric \u00b7 \u53d1\u8868\u4e8e 20&#46;&#46;&#46;<\/p>\n","protected":false},"author":2,"featured_media":0,"comment_status":"closed","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[4],"tags":[],"class_list":["post-1807","post","type-post","status-publish","format-standard","hentry","category-softapp"],"_links":{"self":[{"href":"https:\/\/cabit.top\/index.php?rest_route=\/wp\/v2\/posts\/1807","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/cabit.top\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/cabit.top\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/cabit.top\/index.php?rest_route=\/wp\/v2\/users\/2"}],"replies":[{"embeddable":true,"href":"https:\/\/cabit.top\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=1807"}],"version-history":[{"count":2,"href":"https:\/\/cabit.top\/index.php?rest_route=\/wp\/v2\/posts\/1807\/revisions"}],"predecessor-version":[{"id":1813,"href":"https:\/\/cabit.top\/index.php?rest_route=\/wp\/v2\/posts\/1807\/revisions\/1813"}],"wp:attachment":[{"href":"https:\/\/cabit.top\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=1807"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/cabit.top\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=1807"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/cabit.top\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=1807"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}