Quantcast
Channel: Active questions tagged r - Stack Overflow
Viewing all articles
Browse latest Browse all 201977

Subsetting my data is not yielding the expected results

$
0
0

I have a dataset of 93 observations. There are only two variables, a factor (size, a number) and it's response (percent, also a number). The factor value ranges from 0-2000. I would like to combine these 93 observations into three groups based on factor values (0-2, 2-50, and 50-2000) and see the total combined response value for each.

Here is my data:

> data2
   run  size percentage
1    1 0.375      0.010
2    2 0.412      0.020
3    3 0.452      0.032
4    4 0.496      0.043
5    5 0.545      0.053
6    6 0.598      0.060
7    7 0.656      0.066
8    8 0.721      0.070
9    9 0.791      0.071
10  10 0.868      0.072
11  11 0.953      0.070
12  12 1.047      0.069
13  13 1.149      0.067
14  14 1.261      0.065
15  15 1.385      0.065
16  16 1.520      0.066
17  17 1.668      0.068
18  18 1.832      0.072
19  19 2.011      0.077
20  20 2.207      0.083
21  21 2.423      0.090
22  22 2.660      0.097
23  23 2.920       0.10
24  24 3.205       0.11
25  25 3.519       0.12
26  26 3.863       0.13
27  27 4.240       0.13
28  28 4.655       0.14
29  29 5.110       0.14
30  30 5.610       0.14
31  31 6.158       0.14
32  32 6.760       0.14
33  33 7.421       0.15
34  34 8.147       0.15
35  35 8.943       0.15
36  36 9.817       0.16
37  37 10.78       0.18
38  38 11.83       0.19
39  39 12.99       0.21
40  40 14.26       0.23
41  41 15.65       0.24
42  42 17.18       0.25
43  43 18.86       0.27
44  44 20.70       0.28
45  45 22.73       0.30
46  46 24.95       0.30
47  47 27.39       0.29
48  48 30.07       0.27
49  49 33.01       0.23
50  50 36.24       0.21
51  51 39.78       0.20
52  52 43.67       0.21
53  53 47.94       0.22
54  54 52.62       0.19
55  55 57.77       0.13
56  56 63.41      0.070
57  57 69.61      0.055
58  58 76.42      0.087
59  59 83.89       0.14
60  60 92.09       0.17
61  61 101.1       0.17
62  62 111.0       0.18
63  63 121.8       0.27
64  64 133.7       0.43
65  65 146.8       0.64
66  66 161.2       0.88
67  67 176.9       1.16
68  68 194.2       1.51
69  69 213.2       1.94
70  70 234.1       2.47
71  71 256.9       3.16
72  72 282.1       4.03
73  73 309.6       5.02
74  74 339.9       6.05
75  75 373.1       6.96
76  76 409.6       7.63
77  77 449.7       8.01
78  78 493.6       8.08
79  79 541.9       7.82
80  80 594.9       7.13
81  81 653.0       6.01
82  82 716.8       4.81
83  83 786.9       3.57
84  84 863.9       2.09
85  85 948.3       1.01
86  86  1041       0.55
87  87  1143       0.22
88  88  1255      0.018
89  89  1377          0
90  90  1512          0
91  91  1660          0
92  92  1822          0
93      2000    

Here is a output of dput

dput(data2)
structure(list(run = c("1", "2", "3", "4", "5", "6", "7", "8", 
"9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", 
"20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", 
"31", "32", "33", "34", "35", "36", "37", "38", "39", "40", "41", 
"42", "43", "44", "45", "46", "47", "48", "49", "50", "51", "52", 
"53", "54", "55", "56", "57", "58", "59", "60", "61", "62", "63", 
"64", "65", "66", "67", "68", "69", "70", "71", "72", "73", "74", 
"75", "76", "77", "78", "79", "80", "81", "82", "83", "84", "85", 
"86", "87", "88", "89", "90", "91", "92", ""), size = c("0.375", 
"0.412", "0.452", "0.496", "0.545", "0.598", "0.656", "0.721", 
"0.791", "0.868", "0.953", "1.047", "1.149", "1.261", "1.385", 
"1.520", "1.668", "1.832", "2.011", "2.207", "2.423", "2.660", 
"2.920", "3.205", "3.519", "3.863", "4.240", "4.655", "5.110", 
"5.610", "6.158", "6.760", "7.421", "8.147", "8.943", "9.817", 
"10.78", "11.83", "12.99", "14.26", "15.65", "17.18", "18.86", 
"20.70", "22.73", "24.95", "27.39", "30.07", "33.01", "36.24", 
"39.78", "43.67", "47.94", "52.62", "57.77", "63.41", "69.61", 
"76.42", "83.89", "92.09", "101.1", "111.0", "121.8", "133.7", 
"146.8", "161.2", "176.9", "194.2", "213.2", "234.1", "256.9", 
"282.1", "309.6", "339.9", "373.1", "409.6", "449.7", "493.6", 
"541.9", "594.9", "653.0", "716.8", "786.9", "863.9", "948.3", 
"1041", "1143", "1255", "1377", "1512", "1660", "1822", "2000"
), percentage = c("0.013", "0.023", "0.034", "0.049", "0.061", 
"0.072", "0.083", "0.093", "0.10", "0.11", "0.12", "0.12", "0.13", 
"0.14", "0.14", "0.15", "0.15", "0.16", "0.17", "0.17", "0.18", 
"0.19", "0.20", "0.21", "0.22", "0.24", "0.25", "0.26", "0.28", 
"0.30", "0.31", "0.33", "0.35", "0.37", "0.39", "0.42", "0.45", 
"0.47", "0.50", "0.53", "0.56", "0.58", "0.59", "0.59", "0.58", 
"0.55", "0.52", "0.49", "0.46", "0.45", "0.45", "0.45", "0.44", 
"0.42", "0.38", "0.35", "0.32", "0.31", "0.33", "0.36", "0.42", 
"0.49", "0.59", "0.74", "0.94", "1.19", "1.49", "1.82", "2.18", 
"2.55", "2.94", "3.34", "3.78", "4.25", "4.73", "5.20", "5.60", 
"5.87", "5.93", "5.77", "5.37", "4.77", "4.03", "3.21", "2.36", 
"1.55", "0.81", "0.30", "0.056", "0.0044", "0", "0", "")), class = "data.frame", row.names = c(NA, 
-93L))

I have tried the following code, which I would think should give me the results that I'd like:

clay <- data2 %>% filter(size <= 2)
silt <- data2 %>% filter(size > 2 & size <= 50)
sand <- data2 %>% filter(size > 50 & size <= 2000)
sum(as.numeric(clay$percentage), na.rm=TRUE)
[1] 8.637
sum(as.numeric(silt$percentage), na.rm=TRUE)
[1] 57.217
sum(as.numeric(sand$percentage), na.rm=TRUE)
[1] 0

However, as you can see, the totals do not add up to what they should when looking at the dataset.

Any idea what I am doing wrong here?

Thanks in advance for any help


Viewing all articles
Browse latest Browse all 201977

Trending Articles



<script src="https://jsc.adskeeper.com/r/s/rssing.com.1596347.js" async> </script>