Bazı SAS programlarını R'ye geçirme üzerinde çalışıyorum ve bu basit mantık mantığını gerçekleştirmenin en iyi yolunu bulmakta zorlanıyorum. Sadece aslında satır milyonlarca dışarı xx
ve yy
maç birkaç bin satır olabileceğindenMaç Birleştirme R
proc sort data=df1; by cridsessid mode refresh_key; run;
proc sort data=df2; by cridsessid mode refresh_key; run;
data df3;
merge df1(in=xx) df2(in=yy);
by cridsessid mode refresh_key;
if xx and yy then do;
cridsessid=catx(':',cridsessid,refresh_key2);
end;
run;;
veri setleri zordur, ancak temel çıkış şuna benzer:
df1 bir
dput()
ait olan
transId_app mode sm_bdt sm_edt
1 c3bca1af-ed0d-4403-9552-29758055f7a3 None 21MAR2016:07:07:56.611 21MAR2016:07:07:56.627
2 68f85148-6b75-49dc-90f9-5bb66b6a750b None 21MAR2016:07:07:56.940 21MAR2016:07:07:56.940
3 68f85148-6b75-49dc-90f9-5bb66b6a750b None 21MAR2016:07:07:56.940 21MAR2016:07:07:56.955
4 68f85148-6b75-49dc-90f9-5bb66b6a750b None 21MAR2016:07:07:56.940 21MAR2016:07:07:56.940
5 68f85148-6b75-49dc-90f9-5bb66b6a750b None 21MAR2016:07:07:56.924 21MAR2016:07:07:56.940
6 7a0c53a3-00b4-4b81-8238-24a738e5f4ed None 21MAR2016:07:08:33.003 21MAR2016:07:08:33.003
mode_ cridsessid
1 Pass1 0043dfb7-9a98-4b7c-9c04-5afc05580843:96530555-3568-468c-9bb4-ddd77278f1b2
2 Pass1 0043dfb7-9a98-4b7c-9c04-5afc05580843:96530555-3568-468c-9bb4-ddd77278f1b2
3 Pass1 0043dfb7-9a98-4b7c-9c04-5afc05580843:96530555-3568-468c-9bb4-ddd77278f1b2
4 Pass1 0043dfb7-9a98-4b7c-9c04-5afc05580843:96530555-3568-468c-9bb4-ddd77278f1b2
5 Pass1 0043dfb7-9a98-4b7c-9c04-5afc05580843:96530555-3568-468c-9bb4-ddd77278f1b2
6 Pass1 0043dfb7-9a98-4b7c-9c04-5afc05580843:96530555-3568-468c-9bb4-ddd77278f1b2
refresh_key
1 7.125
2 7.125
3 7.125
4 7.125
5 7.125
6 7.142
structure(list(transId_app = c("c3bca1af-ed0d-4403-9552-29758055f7a3",
"68f85148-6b75-49dc-90f9-5bb66b6a750b", "68f85148-6b75-49dc-90f9-5bb66b6a750b",
"68f85148-6b75-49dc-90f9-5bb66b6a750b", "68f85148-6b75-49dc-90f9-5bb66b6a750b",
"7a0c53a3-00b4-4b81-8238-24a738e5f4ed", "7a0c53a3-00b4-4b81-8238-24a738e5f4ed",
"7a0c53a3-00b4-4b81-8238-24a738e5f4ed", "7a0c53a3-00b4-4b81-8238-24a738e5f4ed"
), mode = c("None", "None", "None", "None", "None", "None", "None",
"None", "None"), sm_bdt = c("21MAR2016:07:07:56.611", "21MAR2016:07:07:56.940",
"21MAR2016:07:07:56.940", "21MAR2016:07:07:56.940", "21MAR2016:07:07:56.924",
"21MAR2016:07:08:33.003", "21MAR2016:07:08:33.003", "21MAR2016:07:08:32.988",
"21MAR2016:07:08:32.957"), sm_edt = c("21MAR2016:07:07:56.627",
"21MAR2016:07:07:56.940", "21MAR2016:07:07:56.955", "21MAR2016:07:07:56.940",
"21MAR2016:07:07:56.940", "21MAR2016:07:08:33.003", "21MAR2016:07:08:33.003",
"21MAR2016:07:08:33.003", "21MAR2016:07:08:33.003"), mode_ = c("Pass1",
"Pass1", "Pass1", "Pass1", "Pass1", "Pass1", "Pass1", "Pass1",
"Pass1"), cridsessid = c("0043dfb7-9a98-4b7c-9c04-5afc05580843:96530555-3568-468c-9bb4-ddd77278f1b2",
"0043dfb7-9a98-4b7c-9c04-5afc05580843:96530555-3568-468c-9bb4-ddd77278f1b2",
"0043dfb7-9a98-4b7c-9c04-5afc05580843:96530555-3568-468c-9bb4-ddd77278f1b2",
"0043dfb7-9a98-4b7c-9c04-5afc05580843:96530555-3568-468c-9bb4-ddd77278f1b2",
"0043dfb7-9a98-4b7c-9c04-5afc05580843:96530555-3568-468c-9bb4-ddd77278f1b2",
"0043dfb7-9a98-4b7c-9c04-5afc05580843:96530555-3568-468c-9bb4-ddd77278f1b2",
"0043dfb7-9a98-4b7c-9c04-5afc05580843:96530555-3568-468c-9bb4-ddd77278f1b2",
"0043dfb7-9a98-4b7c-9c04-5afc05580843:96530555-3568-468c-9bb4-ddd77278f1b2",
"0043dfb7-9a98-4b7c-9c04-5afc05580843:96530555-3568-468c-9bb4-ddd77278f1b2"
), refresh_key = c(7.125, 7.125, 7.125, 7.125, 7.125, 7.142,
7.142, 7.142, 7.142)), .Names = c("transId_app", "mode", "sm_bdt",
"sm_edt", "mode_", "cridsessid", "refresh_key"), class = "data.frame", row.names = c(NA,
9L))
df2 bir dput()
arasında bir dput()
arasında
structure(list(mode = c("None", "None", "LazyLoadUncached", "None",
"LazyLoadUncached", "None"), cridsessid = c("0043dfb7-9a98-4b7c-9c04-5afc05580843:96530555-3568-468c-9bb4-ddd77278f1b2",
"0043dfb7-9a98-4b7c-9c04-5afc05580843:96530555-3568-468c-9bb4-ddd77278f1b2",
"00e8a4c5-904d-46a4-a7b1-e4e6383bdd10:bf97d617-fbe8-4c4d-ab0e-c00d48bce120",
"00e8a4c5-904d-46a4-a7b1-e4e6383bdd10:bf97d617-fbe8-4c4d-ab0e-c00d48bce120",
"02063ca4-ccf3-4326-b87d-fe5ab13d2d7f:07783670-5ace-47bc-a707-db5b8064e241",
"02063ca4-ccf3-4326-b87d-fe5ab13d2d7f:07783670-5ace-47bc-a707-db5b8064e241"
), refresh_key_ = c("7.142+0", "7.142+0", "0.317+0", "0.317+0",
"5.658+17", "5.658+17"), refresh_key = c(7.142, 7.142, 0.317,
0.317, 5.658, 5.658), refresh_key2 = c(7.142, 7.142, 0.317, 0.317,
5.658, 5.658)), .Names = c("mode", "cridsessid", "refresh_key_",
"refresh_key", "refresh_key2"), row.names = c(NA, 6L), class = "data.frame")
df3
transId_app mode sm_bdt sm_edt
1 7a0c53a3-00b4-4b81-8238-24a738e5f4ed None 21MAR2016:07:08:33.003 21MAR2016:07:08:33.003
2 7a0c53a3-00b4-4b81-8238-24a738e5f4ed None 21MAR2016:07:08:33.003 21MAR2016:07:08:33.003
3 7a0c53a3-00b4-4b81-8238-24a738e5f4ed None 21MAR2016:07:08:32.988 21MAR2016:07:08:33.003
4 7a0c53a3-00b4-4b81-8238-24a738e5f4ed None 21MAR2016:07:08:32.957 21MAR2016:07:08:33.003
mode_ cridsessid
1 Pass1 0043dfb7-9a98-4b7c-9c04-5afc05580843:96530555-3568-468c-9bb4-ddd77278f1b2:7.142
2 Pass1 0043dfb7-9a98-4b7c-9c04-5afc05580843:96530555-3568-468c-9bb4-ddd77278f1b2:7.142
3 Pass1 0043dfb7-9a98-4b7c-9c04-5afc05580843:96530555-3568-468c-9bb4-ddd77278f1b2:7.142
4 Pass1 0043dfb7-9a98-4b7c-9c04-5afc05580843:96530555-3568-468c-9bb4-ddd77278f1b2:7.142
refresh_key refresh_key_ refresh_key2
1 7.142 7.142+0 7.142
2 7.142 7.142+0 7.142
3 7.142 7.142+0 7.142
4 7.142 7.142+0 7.142
ile
mode
1 None
2 None
3 LazyLoadUncached
4 None
5 LazyLoadUncached
6 None
cridsessid refresh_key_
1 0043dfb7-9a98-4b7c-9c04-5afc05580843:96530555-3568-468c-9bb4-ddd77278f1b2 7.142+0
2 0043dfb7-9a98-4b7c-9c04-5afc05580843:96530555-3568-468c-9bb4-ddd77278f1b2 7.142+0
3 00e8a4c5-904d-46a4-a7b1-e4e6383bdd10:bf97d617-fbe8-4c4d-ab0e-c00d48bce120 0.317+0
4 00e8a4c5-904d-46a4-a7b1-e4e6383bdd10:bf97d617-fbe8-4c4d-ab0e-c00d48bce120 0.317+0
5 02063ca4-ccf3-4326-b87d-fe5ab13d2d7f:07783670-5ace-47bc-a707-db5b8064e241 5.658+17
6 02063ca4-ccf3-4326-b87d-fe5ab13d2d7f:07783670-5ace-47bc-a707-db5b8064e241 5.658+17
refresh_key refresh_key2
1 7.142 7.142
2 7.142 7.142
3 0.317 0.317
4 0.317 0.317
5 5.658 5.658
6 5.658 5.658
structure(list(transId_app = c("7a0c53a3-00b4-4b81-8238-24a738e5f4ed",
"7a0c53a3-00b4-4b81-8238-24a738e5f4ed", "7a0c53a3-00b4-4b81-8238-24a738e5f4ed",
"7a0c53a3-00b4-4b81-8238-24a738e5f4ed"), mode = c("None", "None",
"None", "None"), sm_bdt = c("21MAR2016:07:08:33.003", "21MAR2016:07:08:33.003",
"21MAR2016:07:08:32.988", "21MAR2016:07:08:32.957"), sm_edt = c("21MAR2016:07:08:33.003",
"21MAR2016:07:08:33.003", "21MAR2016:07:08:33.003", "21MAR2016:07:08:33.003"
), mode_ = c("Pass1", "Pass1", "Pass1", "Pass1"), cridsessid = c("0043dfb7-9a98-4b7c-9c04-5afc05580843:96530555-3568-468c-9bb4-ddd77278f1b2:7.142",
"0043dfb7-9a98-4b7c-9c04-5afc05580843:96530555-3568-468c-9bb4-ddd77278f1b2:7.142",
"0043dfb7-9a98-4b7c-9c04-5afc05580843:96530555-3568-468c-9bb4-ddd77278f1b2:7.142",
"0043dfb7-9a98-4b7c-9c04-5afc05580843:96530555-3568-468c-9bb4-ddd77278f1b2:7.142"
), refresh_key = c(7.142, 7.142, 7.142, 7.142), refresh_key_ = c("7.142+0",
"7.142+0", "7.142+0", "7.142+0"), refresh_key2 = c(7.142, 7.142,
7.142, 7.142)), .Names = c("transId_app", "mode", "sm_bdt", "sm_edt",
"mode_", "cridsessid", "refresh_key", "refresh_key_", "refresh_key2"
), row.names = c(NA, 4L), class = "data.frame")
ben dplyr içinde semi_join()
farkındayım ve etkili if xx and yy
yerini alabilir düşünüyorum, ama ben böyle bir şey yapmak için kullanabileceği bir Mantıksal Vector alacağı nasıl emin değilim, bir data.frame semi_join()
döndürür Bu:
df <- full_join(df1, df2, by="cridsessid", "mode", "refresh_key")
x <- {logical vector}
df[x, "cridsessid"] <- df[x, paste("cridsessid", "mode", sep=":")]
verin bir [MCVE] ve beklenen çıkış örneği cressid' 'için hiç eşleşme olmadığını ve ayrıca üretilen olmalıydı O –
çalışan bir R oturumu içine yapıştırmaya izin vermek için 'dput'. –
, dput() çıkışı ekledi. Teşekkür ederim. Dediğim gibi, hala öğreniyorum :) – sosukeinu