datasus_db.datasources.sia_pa
1import polars as pl 2import logging 3from ..pl_utils import to_schema, Column, DateColumn 4from ..datasus import import_from_ftp 5from ..utils import format_year, format_month 6from ..ftp import fetch_dbc_as_df 7 8MAIN_TABLE = "SIA_PA" 9 10 11def import_sia_pa(db_file="datasus.db", years=["*"], states=["*"], months=["*"]): 12 """Import PA (Produção Ambulatorial) from SIASUS (Sistema de Informações Ambulatorial do SUS). 13 14 Args: 15 db_file (str, optional): path to the duckdb file in which the data will be imported to. Defaults to "datasus.db". 16 years (list, optional): list of years for which data will be imported (if available). Eg: `[2012, 2000, 2010]`. Defaults to ["*"]. 17 states (list, optional): list of brazilian 2 letters state for which data will be imported (if available). Eg: `["SP", "RJ"]`. Defaults to ["*"]. 18 months (list, optional): list of months numbers (1-12) for which data will be imported (if available). Eg: `[1, 12, 6]`. Defaults to ["*"]. 19 20 --- 21 22 Extra: 23 - **Data description**: https://github.com/mymatsubara/datasus-db/blob/main/docs/sia_pa.pdf 24 - **ftp path**: ftp.datasus.gov.br/dissemin/publicos/SIASUS/200801_/Dados/PA*.dbc 25 """ 26 logging.info(f"⏳ [{MAIN_TABLE}] Starting import...") 27 import_from_ftp( 28 [MAIN_TABLE], 29 [ 30 f"/dissemin/publicos/SIASUS/200801_/Dados/PA{state.upper()}{format_year(year)}{format_month(month)}*.dbc" 31 for year in years 32 for state in states 33 for month in months 34 ], 35 fetch_sia_rh, 36 db_file=db_file, 37 ) 38 39 40def fetch_sia_rh(ftp_path: str): 41 df = fetch_dbc_as_df(ftp_path) 42 return {MAIN_TABLE: map_sia_pa(df)} 43 44 45def map_sia_pa(df: pl.DataFrame): 46 df = df.with_columns( 47 pl.when(pl.col(pl.Utf8).str.len_chars() == 0) 48 .then(None) 49 .otherwise(pl.col(pl.Utf8)) 50 .name.keep(), 51 ) 52 53 return to_schema( 54 df, 55 [ 56 Column("PA_CODUNI", pl.Utf8), 57 Column("PA_GESTAO", pl.Utf8), 58 Column("PA_CONDIC", pl.Utf8), 59 Column("PA_UFMUN", pl.Utf8), 60 Column("PA_REGCT", pl.Utf8), 61 Column("PA_INCOUT", pl.Utf8), 62 Column("PA_INCURG", pl.Utf8), 63 Column("PA_TPUPS", pl.Utf8), 64 Column("PA_TIPPRE", pl.Utf8), 65 Column("PA_MN_IND", pl.Utf8), 66 Column("PA_CNPJCPF", pl.Utf8), 67 Column("PA_CNPJMNT", pl.Utf8), 68 Column("PA_CNPJ_CC", pl.Utf8), 69 Column("PA_MVM", pl.Utf8), 70 Column("PA_CMP", pl.Utf8), 71 Column("PA_PROC_ID", pl.Utf8), 72 Column("PA_TPFIN", pl.Utf8), 73 Column("PA_SUBFIN", pl.Utf8), 74 Column("PA_NIVCPL", pl.Utf8), 75 Column("PA_DOCORIG", pl.Utf8), 76 Column("PA_AUTORIZ", pl.Utf8), 77 Column("PA_CNSMED", pl.Utf8), 78 Column("PA_CBOCOD", pl.Utf8), 79 Column("PA_MOTSAI", pl.Utf8), 80 Column("PA_OBITO", pl.Utf8), 81 Column("PA_ENCERR", pl.Utf8), 82 Column("PA_PERMAN", pl.Utf8), 83 Column("PA_ALTA", pl.Utf8), 84 Column("PA_TRANSF", pl.Utf8), 85 Column("PA_CIDPRI", pl.Utf8), 86 Column("PA_CIDSEC", pl.Utf8), 87 Column("PA_CIDCAS", pl.Utf8), 88 Column("PA_CATEND", pl.Utf8), 89 Column("PA_IDADE", pl.Utf8), 90 Column("IDADEMIN", pl.Utf8), 91 Column("IDADEMAX", pl.Utf8), 92 Column("PA_FLIDADE", pl.Utf8), 93 Column("PA_SEXO", pl.Utf8), 94 Column("PA_RACACOR", pl.Utf8), 95 Column("PA_MUNPCN", pl.Utf8), 96 Column("PA_QTDPRO", pl.Utf8), 97 Column("PA_QTDAPR", pl.Utf8), 98 Column("PA_VALPRO", pl.Utf8), 99 Column("PA_VALAPR", pl.Utf8), 100 Column("PA_UFDIF", pl.Utf8), 101 Column("PA_MNDIF", pl.Utf8), 102 Column("PA_DIF_VAL", pl.Utf8), 103 Column("NU_VPA_TOT", pl.Utf8), 104 Column("NU_PA_TOT", pl.Utf8), 105 Column("PA_INDICA", pl.Utf8), 106 Column("PA_CODOCO", pl.Utf8), 107 Column("PA_FLQT", pl.Utf8), 108 Column("PA_FLER", pl.Utf8), 109 Column("PA_ETNIA", pl.Utf8), 110 Column("PA_VL_CF", pl.Float64), 111 Column("PA_VL_CL", pl.Float64), 112 Column("PA_VL_INC", pl.Float64), 113 Column("PA_SRV_C", pl.Utf8), 114 Column("PA_INE", pl.Utf8), 115 Column("PA_NAT_JUR", pl.Utf8), 116 ], 117 )
MAIN_TABLE =
'SIA_PA'
def
import_sia_pa(db_file='datasus.db', years=['*'], states=['*'], months=['*']):
12def import_sia_pa(db_file="datasus.db", years=["*"], states=["*"], months=["*"]): 13 """Import PA (Produção Ambulatorial) from SIASUS (Sistema de Informações Ambulatorial do SUS). 14 15 Args: 16 db_file (str, optional): path to the duckdb file in which the data will be imported to. Defaults to "datasus.db". 17 years (list, optional): list of years for which data will be imported (if available). Eg: `[2012, 2000, 2010]`. Defaults to ["*"]. 18 states (list, optional): list of brazilian 2 letters state for which data will be imported (if available). Eg: `["SP", "RJ"]`. Defaults to ["*"]. 19 months (list, optional): list of months numbers (1-12) for which data will be imported (if available). Eg: `[1, 12, 6]`. Defaults to ["*"]. 20 21 --- 22 23 Extra: 24 - **Data description**: https://github.com/mymatsubara/datasus-db/blob/main/docs/sia_pa.pdf 25 - **ftp path**: ftp.datasus.gov.br/dissemin/publicos/SIASUS/200801_/Dados/PA*.dbc 26 """ 27 logging.info(f"⏳ [{MAIN_TABLE}] Starting import...") 28 import_from_ftp( 29 [MAIN_TABLE], 30 [ 31 f"/dissemin/publicos/SIASUS/200801_/Dados/PA{state.upper()}{format_year(year)}{format_month(month)}*.dbc" 32 for year in years 33 for state in states 34 for month in months 35 ], 36 fetch_sia_rh, 37 db_file=db_file, 38 )
Import PA (Produção Ambulatorial) from SIASUS (Sistema de Informações Ambulatorial do SUS).
Arguments:
- db_file (str, optional): path to the duckdb file in which the data will be imported to. Defaults to "datasus.db".
- years (list, optional): list of years for which data will be imported (if available). Eg:
[2012, 2000, 2010]
. Defaults to ["*"]. - states (list, optional): list of brazilian 2 letters state for which data will be imported (if available). Eg:
["SP", "RJ"]
. Defaults to ["*"]. - months (list, optional): list of months numbers (1-12) for which data will be imported (if available). Eg:
[1, 12, 6]
. Defaults to ["*"].
Extra:
- Data description: https://github.com/mymatsubara/datasus-db/blob/main/docs/sia_pa.pdf
- ftp path: ftp.datasus.gov.br/dissemin/publicos/SIASUS/200801_/Dados/PA*.dbc
def
fetch_sia_rh(ftp_path: str):
def
map_sia_pa(df: polars.dataframe.frame.DataFrame):
46def map_sia_pa(df: pl.DataFrame): 47 df = df.with_columns( 48 pl.when(pl.col(pl.Utf8).str.len_chars() == 0) 49 .then(None) 50 .otherwise(pl.col(pl.Utf8)) 51 .name.keep(), 52 ) 53 54 return to_schema( 55 df, 56 [ 57 Column("PA_CODUNI", pl.Utf8), 58 Column("PA_GESTAO", pl.Utf8), 59 Column("PA_CONDIC", pl.Utf8), 60 Column("PA_UFMUN", pl.Utf8), 61 Column("PA_REGCT", pl.Utf8), 62 Column("PA_INCOUT", pl.Utf8), 63 Column("PA_INCURG", pl.Utf8), 64 Column("PA_TPUPS", pl.Utf8), 65 Column("PA_TIPPRE", pl.Utf8), 66 Column("PA_MN_IND", pl.Utf8), 67 Column("PA_CNPJCPF", pl.Utf8), 68 Column("PA_CNPJMNT", pl.Utf8), 69 Column("PA_CNPJ_CC", pl.Utf8), 70 Column("PA_MVM", pl.Utf8), 71 Column("PA_CMP", pl.Utf8), 72 Column("PA_PROC_ID", pl.Utf8), 73 Column("PA_TPFIN", pl.Utf8), 74 Column("PA_SUBFIN", pl.Utf8), 75 Column("PA_NIVCPL", pl.Utf8), 76 Column("PA_DOCORIG", pl.Utf8), 77 Column("PA_AUTORIZ", pl.Utf8), 78 Column("PA_CNSMED", pl.Utf8), 79 Column("PA_CBOCOD", pl.Utf8), 80 Column("PA_MOTSAI", pl.Utf8), 81 Column("PA_OBITO", pl.Utf8), 82 Column("PA_ENCERR", pl.Utf8), 83 Column("PA_PERMAN", pl.Utf8), 84 Column("PA_ALTA", pl.Utf8), 85 Column("PA_TRANSF", pl.Utf8), 86 Column("PA_CIDPRI", pl.Utf8), 87 Column("PA_CIDSEC", pl.Utf8), 88 Column("PA_CIDCAS", pl.Utf8), 89 Column("PA_CATEND", pl.Utf8), 90 Column("PA_IDADE", pl.Utf8), 91 Column("IDADEMIN", pl.Utf8), 92 Column("IDADEMAX", pl.Utf8), 93 Column("PA_FLIDADE", pl.Utf8), 94 Column("PA_SEXO", pl.Utf8), 95 Column("PA_RACACOR", pl.Utf8), 96 Column("PA_MUNPCN", pl.Utf8), 97 Column("PA_QTDPRO", pl.Utf8), 98 Column("PA_QTDAPR", pl.Utf8), 99 Column("PA_VALPRO", pl.Utf8), 100 Column("PA_VALAPR", pl.Utf8), 101 Column("PA_UFDIF", pl.Utf8), 102 Column("PA_MNDIF", pl.Utf8), 103 Column("PA_DIF_VAL", pl.Utf8), 104 Column("NU_VPA_TOT", pl.Utf8), 105 Column("NU_PA_TOT", pl.Utf8), 106 Column("PA_INDICA", pl.Utf8), 107 Column("PA_CODOCO", pl.Utf8), 108 Column("PA_FLQT", pl.Utf8), 109 Column("PA_FLER", pl.Utf8), 110 Column("PA_ETNIA", pl.Utf8), 111 Column("PA_VL_CF", pl.Float64), 112 Column("PA_VL_CL", pl.Float64), 113 Column("PA_VL_INC", pl.Float64), 114 Column("PA_SRV_C", pl.Utf8), 115 Column("PA_INE", pl.Utf8), 116 Column("PA_NAT_JUR", pl.Utf8), 117 ], 118 )