= pd.read_json("users_export_phone.json") df
=["phone_number"], inplace=True)
df.drop_duplicates(subset=["phone_number"], inplace=True) df.dropna(subset
= "\n".join(df["phone_number"].astype("str").tolist())
txt_num with open("phone_numbers.txt", "w") as file:
file .write(txt_num)
= pd.DataFrame()
df_all for i in glob.iglob("word_data/*.json"):
if "article" in i:
= pd.read_json(i)
df if "count" in df.columns.tolist():
={"count":"words"}, inplace=True)
df.rename(columns= pd.concat([df, df_all], ignore_index=True)
df_all else:
pass
"created"] = pd.to_datetime(df_all["created"]) df_all[
= df_all[df_all["created"] > "2023-07-23"] df_all
"month"] = df_all["created"].dt.month
df_all["month").agg({"complete": "sum", "words": "sum"}) df_all.groupby(
complete | words | |
---|---|---|
month | ||
1 | 1972 | 1944453 |
7 | 508 | 663098 |
8 | 1310 | 1688996 |
9 | 1479 | 1944561 |
10 | 1804 | 2373311 |
11 | 1935 | 1904306 |
12 | 2876 | 2759498 |
= df_all[df_all["created"] > "1-10-2023"]
dd "words"].max() dd[
8839
= df_all.groupby("week").agg({"words": "sum"}).reset_index()
df_month = df_all.groupby("day").agg({"words": "sum"}).reset_index() df_day
= dd.groupby(pd.Grouper(key='created', freq='D')).agg({"words": "mean"})
aprint(a["words"].mean())
614.6506297601618
= df_all.groupby(pd.Grouper(key='created', freq='W')).agg({"words": "mean"})
aprint(a["words"].mean())
591.223619913266
def weekinmonth(dates):
"""Get week number in a month.
Parameters:
dates (pd.Series): Series of dates.
Returns:
pd.Series: Week number in a month.
"""
= dates - pd.to_timedelta(dates.dt.day - 1, unit='d')
firstday_in_month return (dates.dt.day-1 + firstday_in_month.dt.weekday) // 7 + 1
"created"]) weekinmonth(df[
AttributeError: Can only use .dt accessor with datetimelike values
= pd.read_excel("Tarikh.xlsx", header=None)
df =True)
df.dropna(inplace0] = df[0].apply(lambda x: str(x).replace("/", "-")) df[
0][0] df.iloc[
'1401/01/01'
"1401/01/01".replace("/", "")
# import re
# re.sub("\\", " ","1401/01/01")
'14010101'
df
0 | |
---|---|
0 | 1401-01-01 |
1 | 1401-01-02 |
2 | 1401-01-03 |
3 | 1401-01-04 |
4 | 1401-01-05 |
... | ... |
4573 | 1402-07-08 |
4574 | 1402-07-08 |
4575 | 1402-07-08 |
4576 | 1402-07-09 |
4577 | 1402-07-08 |
4412 rows × 1 columns
import jalali
"1401-04-24").gregorian_string() jalali.Persian(
'2022-7-15'
def to_georgian(row):
try:
return jalali.Persian(str(row)).gregorian_string()
except:
return "BAD FORMAT"
"georgian"] = df[0].apply(lambda x: to_georgian(x)) df[
= pd.read_csv("events-export-2920092-1697700606932.csv") df
"Time"].iloc[0] df[
1693584039.558
import json
with open("events-export-2920092-1697700859686.json") as f:
= json.load(f) j
0] j[
{'event': 'Create Purchase',
'properties': {'time': 1693584039.558,
'distinct_id': 'mehdiahmadvand123s@gmail.com',
'$insert_id': 'd1e5322e31ef4eb69e5d34080537e7f1',
'$lib_version': '4.10.0',
'$mp_api_endpoint': 'api.mixpanel.com',
'$mp_api_timestamp_ms': 1693584039627,
'Discount': None,
'Plan': 3112720570236792,
'Price': 100000,
'Status': 'paied',
'Words after purchase': 5000,
'Words before purchase': 5000,
'mp_lib': 'python',
'mp_processing_time_ms': 1693584039662},
'isExpanded': True}
import json
import glob
= []
dirs for i in glob.iglob("*.json"):
dirs.append(i)
with open(dirs[3]) as file:
= json.load(file) j
import pandas as pd
import re
with open("discount_codes.txt") as file:
= file.readlines() codes
= [x.replace("\n", "") for x in codes] codes
= pd.DataFrame({"codes": codes}) df
"discount_codes.xlsx", index=False) df.to_excel(
= """
a html
asdad
html
"""
a.strip()
'html\nasdad\nhtml'
if a.strip().startswith("html"):
print(a[4:])
l
asdad
html