datasus_db.datasources.po
1import polars as pl 2import logging 3from ..pl_utils import to_schema, Column, DateColumn 4from ..datasus import import_from_ftp 5from ..utils import format_year 6from ..ftp import fetch_dbc_as_df 7 8MAIN_TABLE = "PO" 9 10 11def import_po(db_file="datasus.db", years=["*"]): 12 """Import PO (Painel de Oncologia) data (since 2013). 13 14 Args: 15 db_file (str, optional): path to the duckdb file in which the data will be imported to. Defaults to "datasus.db". 16 years (list, optional): list of years for which data will be imported (if available). Eg: `[2013, 2020]` Defaults to ["*"]. 17 18 --- 19 20 Extra: 21 - **Data description**: https://github.com/mymatsubara/datasus-db/blob/main/docs/po.pdf 22 - **ftp path**: ftp.datasus.gov.br/dissemin/publicos/IBGE/POP/POPBR*.zip 23 """ 24 logging.info(f"⏳ [{MAIN_TABLE}] Starting import...") 25 26 import_from_ftp( 27 [MAIN_TABLE], 28 [ 29 f"/dissemin/publicos/PAINEL_ONCOLOGIA/DADOS/POBR{format_year(year, digits=4)}.dbc*" 30 for year in years 31 ], 32 fetch_po, 33 db_file=db_file, 34 ) 35 36 37def fetch_po(ftp_path: str): 38 df = fetch_dbc_as_df(ftp_path) 39 return {MAIN_TABLE: map_po(df)} 40 41 42def map_po(df: pl.DataFrame): 43 df = df.with_columns( 44 [ 45 pl.when(pl.col(pl.Utf8).str.len_chars() == 0) 46 .then(None) 47 .otherwise(pl.col(pl.Utf8)) 48 .name.keep(), 49 ] 50 ) 51 return to_schema( 52 df, 53 [ 54 Column("ANO_DIAGN", pl.UInt16), 55 Column("ANOMES_DIA", pl.UInt32), 56 Column("ANO_TRATAM", pl.UInt16), 57 Column("ANOMES_TRA", pl.UInt32), 58 Column("UF_RESID", pl.UInt8), 59 Column("MUN_RESID", pl.UInt32), 60 Column("UF_TRATAM", pl.UInt8), 61 Column("MUN_TRATAM", pl.UInt32), 62 Column("UF_DIAGN", pl.UInt8), 63 Column("MUN_DIAG", pl.UInt32), 64 Column("TRATAMENTO", pl.UInt8), 65 Column("DIAGNOSTIC", pl.UInt8), 66 Column("IDADE", pl.UInt8, strict=False), 67 Column("SEXO", pl.Utf8), 68 Column("ESTADIAM", pl.UInt8), 69 Column("CNES_DIAG", pl.UInt32), 70 Column("CNES_TRAT", pl.UInt32), 71 Column("TEMPO_TRAT", pl.Utf8), 72 Column("CNS_PAC", pl.Utf8), 73 Column("DIAG_DETH", pl.Utf8), 74 DateColumn("DT_DIAG", "%d/%m/%Y"), 75 DateColumn("DT_TRAT", "%d/%m/%Y"), 76 DateColumn("DT_NASC", "%d/%m/%Y"), 77 ], 78 )
MAIN_TABLE =
'PO'
def
import_po(db_file='datasus.db', years=['*']):
12def import_po(db_file="datasus.db", years=["*"]): 13 """Import PO (Painel de Oncologia) data (since 2013). 14 15 Args: 16 db_file (str, optional): path to the duckdb file in which the data will be imported to. Defaults to "datasus.db". 17 years (list, optional): list of years for which data will be imported (if available). Eg: `[2013, 2020]` Defaults to ["*"]. 18 19 --- 20 21 Extra: 22 - **Data description**: https://github.com/mymatsubara/datasus-db/blob/main/docs/po.pdf 23 - **ftp path**: ftp.datasus.gov.br/dissemin/publicos/IBGE/POP/POPBR*.zip 24 """ 25 logging.info(f"⏳ [{MAIN_TABLE}] Starting import...") 26 27 import_from_ftp( 28 [MAIN_TABLE], 29 [ 30 f"/dissemin/publicos/PAINEL_ONCOLOGIA/DADOS/POBR{format_year(year, digits=4)}.dbc*" 31 for year in years 32 ], 33 fetch_po, 34 db_file=db_file, 35 )
Import PO (Painel de Oncologia) data (since 2013).
Arguments:
- db_file (str, optional): path to the duckdb file in which the data will be imported to. Defaults to "datasus.db".
- years (list, optional): list of years for which data will be imported (if available). Eg:
[2013, 2020]
Defaults to ["*"].
Extra:
- Data description: https://github.com/mymatsubara/datasus-db/blob/main/docs/po.pdf
- ftp path: ftp.datasus.gov.br/dissemin/publicos/IBGE/POP/POPBR*.zip
def
fetch_po(ftp_path: str):
def
map_po(df: polars.dataframe.frame.DataFrame):
43def map_po(df: pl.DataFrame): 44 df = df.with_columns( 45 [ 46 pl.when(pl.col(pl.Utf8).str.len_chars() == 0) 47 .then(None) 48 .otherwise(pl.col(pl.Utf8)) 49 .name.keep(), 50 ] 51 ) 52 return to_schema( 53 df, 54 [ 55 Column("ANO_DIAGN", pl.UInt16), 56 Column("ANOMES_DIA", pl.UInt32), 57 Column("ANO_TRATAM", pl.UInt16), 58 Column("ANOMES_TRA", pl.UInt32), 59 Column("UF_RESID", pl.UInt8), 60 Column("MUN_RESID", pl.UInt32), 61 Column("UF_TRATAM", pl.UInt8), 62 Column("MUN_TRATAM", pl.UInt32), 63 Column("UF_DIAGN", pl.UInt8), 64 Column("MUN_DIAG", pl.UInt32), 65 Column("TRATAMENTO", pl.UInt8), 66 Column("DIAGNOSTIC", pl.UInt8), 67 Column("IDADE", pl.UInt8, strict=False), 68 Column("SEXO", pl.Utf8), 69 Column("ESTADIAM", pl.UInt8), 70 Column("CNES_DIAG", pl.UInt32), 71 Column("CNES_TRAT", pl.UInt32), 72 Column("TEMPO_TRAT", pl.Utf8), 73 Column("CNS_PAC", pl.Utf8), 74 Column("DIAG_DETH", pl.Utf8), 75 DateColumn("DT_DIAG", "%d/%m/%Y"), 76 DateColumn("DT_TRAT", "%d/%m/%Y"), 77 DateColumn("DT_NASC", "%d/%m/%Y"), 78 ], 79 )