Войти
или
Зарегистрироваться
Курсы
Учебник
Учебник 2.0
ОГЭ/ЕГЭ
Олимпиады
Рубрикатор
Компилятор
Статья Автор:
Анна Горбачева
Горбачева Анна Самматив
import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from google.colab import drive drive.mount('/content/drive') Mounted at /content/drive df = pd.read_csv('drive/MyDrive/iris_extended.csv') df.sample(10) species elevation soil_type sepal_length sepal_width petal_length petal_width sepal_area petal_area sepal_aspect_ratio ... sepal_to_petal_length_ratio sepal_to_petal_width_ratio sepal_petal_length_diff sepal_petal_width_diff petal_curvature_mm petal_texture_trichomes_per_mm2 leaf_area_cm2 sepal_area_sqrt petal_area_sqrt area_ratios 931 virginica 251.5 loamy 6.70 3.33 4.63 1.94 22.3110 8.9822 2.012012 ... 1.447084 1.716495 2.07 1.39 7.96 10.80 56.34 4.723452 2.997032 2.483913 132 setosa 150.2 clay 5.18 3.11 1.56 0.33 16.1098 0.5148 1.665595 ... 3.320513 9.424242 3.62 2.78 5.37 22.10 59.84 4.013702 0.717496 31.293318 353 setosa 178.8 clay 4.71 4.20 1.53 0.58 19.7820 0.8874 1.121429 ... 3.078431 7.241379 3.18 3.62 5.05 22.61 57.08 4.447696 0.942019 22.292089 376 setosa 203.9 clay 5.14 4.13 1.52 0.31 21.2282 0.4712 1.244552 ... 3.381579 13.322581 3.62 3.82 5.49 18.79 51.04 4.607407 0.686440 45.051358 329 setosa 285.2 sandy 4.68 2.95 1.23 0.20 13.8060 0.2460 1.586441 ... 3.804878 14.750000 3.45 2.75 4.45 19.58 46.68 3.715643 0.495984 56.121951 154 setosa 144.7 clay 5.43 3.83 1.59 0.38 20.7969 0.6042 1.417755 ... 3.415094 10.078947 3.84 3.45 4.58 18.94 54.00 4.560362 0.777303 34.420556 957 virginica 162.2 loamy 6.34 2.84 6.15 2.22 18.0056 13.6530 2.232394 ... 1.030894 1.279279 0.19 0.62 13.44 10.58 72.82 4.243301 3.694997 1.318802 355 setosa 190.5 loamy 4.71 3.09 1.42 0.34 14.5539 0.4828 1.524272 ... 3.316901 9.088235 3.29 2.75 4.30 22.60 40.25 3.814957 0.694838 30.144780 263 setosa 256.1 loamy 4.76 3.17 1.62 0.37 15.0892 0.5994 1.501577 ... 2.938272 8.567568 3.14 2.80 5.30 19.91 58.38 3.884482 0.774209 25.173841 303 setosa 111.7 sandy 5.17 3.33 1.50 0.36 17.2161 0.5400 1.552553 ... 3.446667 9.250000 3.67 2.97 5.22 19.83 57.71 4.149229 0.734847 31.881667 df2 = df for col in df2.columns: if df2[col].dtype == float: q1 = df2[col].quantile(0.25) q3 = df2[col].quantile(0.75) iqr = q3 - q1 up = q3 + 1.5 * iqr down = q1 - 1.5 * iqr print(col, up, down) df2 = df2[(df2[col] <= up) & (df2[col] >= down)] print('ejection_count', len(df.index) - len(df2.index)) all = len(df2.index) # clay sandy loamy # virginica setosa versicolor counts = df2.species.value_counts() all_clay = df2[df2.soil_type == 'clay'].species.value_counts() all_sandy = df2[df2.soil_type == 'sandy'].species.value_counts() all_loamy = df2[df2.soil_type == 'loamy'].species.value_counts() virginica_counts = [all_clay['virginica'] / counts['virginica'] * 100, all_sandy['virginica'] / counts['virginica'] * 100, all_loamy['virginica'] / counts['virginica'] * 100] setosa_counts = [all_clay['setosa'] / counts['setosa'] * 100, all_sandy['setosa'] / counts['setosa'] * 100, all_loamy['setosa'] / counts['setosa'] * 100] versicolor_counts = [all_clay['versicolor'] / counts['versicolor'] * 100, all_sandy['versicolor'] / counts['versicolor'] * 100, all_loamy['versicolor'] / counts['versicolor'] * 100] barWidth = 0.25 fig = plt.subplots(figsize =(12, 8)) br1 = np.arange(3) br2 = [x + barWidth for x in br1] br3 = [x + barWidth for x in br2] plt.bar(br1, virginica_counts, color ='black', width = barWidth, edgecolor ='white', label ='Вирджиния') plt.bar(br2, setosa_counts, color ='orange', width = barWidth, edgecolor ='white', label ='Сетоза') plt.bar(br3, versicolor_counts, color ='grey', width = barWidth, edgecolor ='white', label ='Версиколор') plt.xlabel('Почва обнаружения', fontsize = 15) plt.ylabel('Сорта ириса, проценты', fontsize = 15) plt.xticks([r + barWidth for r in range(3)], ['Глинистая почва', 'Песчаная почва', 'Суглинистая почва']) plt.legend() plt.show() df2.sample(20) y = pd.Series(list(df2.petal_texture_trichomes_per_mm2)) x = pd.Series(list(df2.leaf_area_cm2)) correlation = y.corr(x) plt.scatter(x, y, s=1) plt.plot(np.unique(x), np.poly1d(np.polyfit(x, y, 1)) (np.unique(x)), color='red') plt.scatter(x, y, s=1, c='b') plt.plot(np.unique(x), np.poly1d(np.polyfit(x, y, 1)) (np.unique(x)), color='red') plt.xlabel('Площадь листа в квадратных сантиметрах') plt.ylabel('Кол-во трихором на квадратный миллиметр') Text(0, 0.5, 'Кол-во трихором на квадратный миллиметр')
×
Прикрепленные файлы
Горбачева Самматив.csv
Горбачева Самматив.docx
Чтобы оставить комментарий нужна авторизация
Печать