RegexΒΆ
[1]:
import pandas as pd
import numpy as np
import re
[2]:
aita_url = "https://raw.githubusercontent.com/roualdes/data/master/aita_clean_really_lightweight.csv"
df = pd.read_csv(aita_url)
df["body"].fillna("", inplace = True)
[3]:
# with pd.read_csv(aita_url, chunksize=1) as rdr:
# for chunk in rdr:
# print(chunk["id"])
[4]:
df["id"].str.count(r'^b.*q$').sum()
[4]:
12
[5]:
idxb = df["id"].str.match(r"^b.*")
df["id"].loc[idxb]
[5]:
2 bajsje
4 bz4m2k
7 bvxwjj
8 bn5hw7
22 bpszpy
...
993 bded2i
995 bpr3fg
996 b5gcz9
998 bkhy6s
999 bnkaf7
Name: id, Length: 324, dtype: object
[6]:
idxq = df["id"].str.match(r".*q$")
df["id"].loc[idxq]
[6]:
95 devpkq
117 bk9abq
137 b5ugbq
142 chjtyq
149 dnqemq
199 dc0nlq
207 7xtfbq
209 b7aqeq
210 blegvq
294 cv2fmq
304 dri7xq
311 cx19xq
319 cfxu9q
334 cs1soq
353 bu0bbq
364 95yglq
372 ch5h3q
380 c6drhq
400 coqrxq
432 c24eeq
434 cl8nqq
445 86v4kq
446 bdljxq
487 dgqx3q
500 clr14q
506 5m7e4q
515 cumqkq
630 b7rl0q
633 7y29kq
648 bu80iq
662 bo863q
664 b9c0tq
687 dazejq
752 dqe0pq
791 bvjiuq
867 axbe8q
882 d801yq
977 btdv5q
Name: id, dtype: object
[7]:
idx = np.logical_or(idxb, idxq)
df["id"].loc[idx]
[7]:
2 bajsje
4 bz4m2k
7 bvxwjj
8 bn5hw7
22 bpszpy
...
993 bded2i
995 bpr3fg
996 b5gcz9
998 bkhy6s
999 bnkaf7
Name: id, Length: 350, dtype: object
[8]:
p = np.mean(df['is_asshole'])
data = {"Label": ["A-hole", "Not A-hole"], "Proportion": [p, 1-p]}
pd.DataFrame(data).round(2)
[8]:
Label | Proportion | |
---|---|---|
0 | A-hole | 0.24 |
1 | Not A-hole | 0.76 |
[4]:
jdx = df["body"].str.match(r'.*\b([W|w]ife|[G|g]irlfriend)\b.*')
[10]:
df["body"].loc[np.where(jdx == 1)[0][3]]
[10]:
"So I (27M) went for a drink with some friends (2 guys my age and also 2 girls). Just for a preliminary note, I have a girlfriend (26F) and so do the two guys. However our girlfriends weren't with us.\n\nNow anyway, as we were drinking one of the guy friends, we'll call Steve, said that he thinks he is ''in love'' with his girlfriend and that there's ''noone else in the world that he loves more than her''. Now we can call the other guy Joe and the other two girls Bethany and Katy. Joe congratulated him and Bethany and Katy said ''awwwh''. I congratulated him too that he's found a girl he really likes.\n\nThen they turned to me and asked whether I love my girlfriend. I said that I do, but I don't know why I said this, as I was drunk, I just blurted out ''I love her very much, I really do, but...there's noone in the whole world that I love more than myself. Not even my own mother.''\n\nThey looked at me startled and the girls said ''That's such a fucking douchey thing to say''. I quickly changed the topic, but when I think of it, I don't see the issue. It's socially acceptable for a guy to say he loves his girlfriend/wife more than anyone, or it's socially acceptable for a mother to say she loves her son more than anyone (or a father loves his daughter more than anyone). But it's douchey to say I love myself more than anyone?\n\nI 100% feel that I love myself more than anyone. I love myself more than my parents. More than my friends. More than my girlfriend. I don't see why people get triggered at this, AITA?"
[25]:
adx = df["is_asshole"] == 1
np.round(np.sum(np.logical_and(adx, jdx)) / np.sum(jdx), 2)
[25]:
0.28