Quantcast
Channel: Active questions tagged r - Stack Overflow
Viewing all articles
Browse latest Browse all 204922

table join with exact and "nearest" matches using data.table

$
0
0

I want to do a data.table table join by specifying some exact key matches and some nearest match (roll="nearest"). The example below uses a cartesian join which doesn't work for bigger tables. can I join by name,site, time type =(exact,exact,"nearest")?

#toy example
library(data.table)
library(lubridate)

set.seed(1)
df1 = data.table(name=sample(c("Andy","Beth","Chris"),10,replace = T),
           site=sample(LETTERS[1:3],10,replace = T),
           time1 = ymd_hm("2019-01-01 00:00")+dminutes(sample(seq(1,1e5),10))
           )
df2 = data.table(name=sample(c("Andy","Beth","Chris"),10,replace = T),
           site=sample(LETTERS[1:3],10,replace = T),
           time2 = ymd_hm("2019-01-01 00:00")+dminutes(sample(seq(1,1e5),10))
)
> df1
     name site               time1
 1:  Andy    A 2019-03-06 21:51:00
 2:  Beth    A 2019-01-15 17:35:00
 3:  Beth    C 2019-02-15 06:07:00
 4: Chris    B 2019-01-09 17:16:00
 5:  Andy    C 2019-01-19 13:21:00
 6: Chris    B 2019-01-27 19:30:00
 7: Chris    C 2019-01-01 22:19:00
 8:  Beth    C 2019-01-27 13:17:00
 9:  Beth    B 2019-03-02 09:23:00
10:  Andy    C 2019-01-24 15:12:00
> df2
     name site               time2
 1:  Beth    C 2019-02-03 04:02:00
 2:  Beth    B 2019-03-01 19:21:00
 3:  Beth    C 2019-01-31 10:09:00
 4:  Andy    B 2019-01-17 23:59:00
 5: Chris    B 2019-01-05 21:48:00
 6: Chris    C 2019-01-07 21:47:00
 7: Chris    A 2019-01-22 23:06:00
 8:  Andy    B 2019-02-06 00:20:00
 9: Chris    C 2019-02-15 23:16:00
10:  Beth    C 2019-01-29 06:00:00

#join by name and site using exact matches and by nearest time 
df3 = df1[df2,on=.(name,site),nomatch=NULL][, diff:=abs(time1-time2)][order(name,site,diff),.SD[1],by=list(name,site)][,diff:=NULL]
> df3
    name site               time1               time2
1:  Beth    B 2019-03-02 09:23:00 2019-03-01 19:21:00
2:  Beth    C 2019-01-27 13:17:00 2019-01-29 06:00:00
3: Chris    B 2019-01-09 17:16:00 2019-01-05 21:48:00
4: Chris    C 2019-01-01 22:19:00 2019-01-07 21:47:00

Viewing all articles
Browse latest Browse all 204922

Trending Articles



<script src="https://jsc.adskeeper.com/r/s/rssing.com.1596347.js" async> </script>