datasus_db.datasources.sih_rd
1import polars as pl 2import logging 3from ..pl_utils import to_schema, Column, DateColumn 4from ..datasus import import_from_ftp 5from ..utils import format_year, format_month 6from ..ftp import fetch_dbc_as_df 7 8MAIN_TABLE = "SIH_RD" 9 10 11def import_sih_rd(db_file="datasus.db", years=["*"], states=["*"], months=["*"]): 12 """Import RD (Autorização de Internação Hospitalar Reduzida) from SIMSUS (Sistema de Informações Hospitalares do SUS). 13 14 Args: 15 db_file (str, optional): path to the duckdb file in which the data will be imported to. Defaults to "datasus.db". 16 years (list, optional): list of years for which data will be imported (if available). Eg: `[2012, 2000, 2010]`. Defaults to ["*"]. 17 states (list, optional): list of brazilian 2 letters state for which data will be imported (if available). Eg: `["SP", "RJ"]`. Defaults to ["*"]. 18 months (list, optional): list of months numbers (1-12) for which data will be imported (if available). Eg: `[1, 12, 6]`. Defaults to ["*"]. 19 20 --- 21 22 Extra: 23 - **Data description**: https://github.com/mymatsubara/datasus-db/blob/main/docs/sih_rd.pdf 24 - **ftp path**: ftp.datasus.gov.br/dissemin/publicos/SIHSUS/200801_/Dados/RD*.dbc 25 """ 26 logging.info(f"⏳ [{MAIN_TABLE}] Starting import...") 27 28 import_from_ftp( 29 [MAIN_TABLE], 30 [ 31 f"/dissemin/publicos/SIHSUS/200801_/Dados/RD{state.upper()}{format_year(year)}{format_month(month)}.dbc*" 32 for year in years 33 for state in states 34 for month in months 35 ], 36 fetch_sih_rh, 37 db_file=db_file, 38 ) 39 40 41def fetch_sih_rh(ftp_path: str): 42 df = fetch_dbc_as_df(ftp_path) 43 return {MAIN_TABLE: map_sih_rd(df)} 44 45 46def map_sih_rd(df: pl.DataFrame): 47 df = df.with_columns( 48 pl.when(pl.col(pl.Utf8).str.len_chars() == 0) 49 .then(None) 50 .otherwise(pl.col(pl.Utf8)) 51 .name.keep(), 52 ).with_columns( 53 pl.when(pl.col("GESTOR_CPF").str.contains("[1-9]")) 54 .then(pl.col("GESTOR_CPF")) 55 .otherwise(None) 56 .name.keep(), 57 pl.when(pl.col("INSC_PN").str.contains("[1-9]")) 58 .then(pl.col("INSC_PN")) 59 .otherwise(None) 60 .name.keep(), 61 ) 62 63 return to_schema( 64 df, 65 [ 66 Column("UF_ZI", pl.Utf8), 67 Column("ANO_CMPT", pl.Utf8), 68 Column("MES_CMPT", pl.Utf8), 69 Column("ESPEC", pl.Utf8), 70 Column("CGC_HOSP", pl.Utf8), 71 Column("N_AIH", pl.Utf8), 72 Column("IDENT", pl.Utf8), 73 Column("CEP", pl.Utf8), 74 Column("MUNIC_RES", pl.Utf8), 75 Column("NASC", pl.Utf8), 76 Column("SEXO", pl.Utf8), 77 Column("UTI_MES_IN", pl.Utf8), 78 Column("UTI_MES_AN", pl.Utf8), 79 Column("UTI_MES_AL", pl.Utf8), 80 Column("UTI_MES_TO", pl.Utf8), 81 Column("UTI_INT_IN", pl.Utf8), 82 Column("UTI_INT_AN", pl.Utf8), 83 Column("UTI_INT_AL", pl.Utf8), 84 Column("UTI_INT_TO", pl.Utf8), 85 Column("DIAR_ACOM", pl.Utf8), 86 Column("QT_DIARIAS", pl.Utf8), 87 Column("PROC_SOLIC", pl.Utf8), 88 Column("PROC_REA", pl.Utf8), 89 Column("VAL_SH", pl.Float64), 90 Column("VAL_SP", pl.Float64), 91 Column("VAL_SADT", pl.Float64), 92 Column("VAL_RN", pl.Float64), 93 Column("VAL_ACOMP", pl.Float64), 94 Column("VAL_ORTP", pl.Float64), 95 Column("VAL_SANGUE", pl.Float64), 96 Column("VAL_SADTSR", pl.Float64), 97 Column("VAL_TRANSP", pl.Float64), 98 Column("VAL_OBSANG", pl.Float64), 99 Column("VAL_PED1AC", pl.Float64), 100 Column("VAL_TOT", pl.Float64), 101 Column("VAL_UTI", pl.Float64), 102 Column("US_TOT", pl.Float64), 103 Column("DT_INTER", pl.Utf8), 104 Column("DT_SAIDA", pl.Utf8), 105 Column("DIAG_PRINC", pl.Utf8), 106 Column("DIAG_SECUN", pl.Utf8), 107 Column("COBRANCA", pl.Utf8), 108 Column("NATUREZA", pl.Utf8), 109 Column("NAT_JUR", pl.Utf8), 110 Column("GESTAO", pl.Utf8), 111 Column("RUBRICA", pl.Utf8), 112 Column("IND_VDRL", pl.Utf8), 113 Column("MUNIC_MOV", pl.Utf8), 114 Column("COD_IDADE", pl.Utf8), 115 Column("IDADE", pl.Utf8, strict=False), 116 Column("DIAS_PERM", pl.Utf8), 117 Column("MORTE", pl.Utf8), 118 Column("NACIONAL", pl.Utf8), 119 Column("NUM_PROC", pl.Utf8), 120 Column("CAR_INT", pl.Utf8), 121 Column("TOT_PT_SP", pl.Utf8), 122 Column("CPF_AUT", pl.Utf8), 123 Column("HOMONIMO", pl.Utf8), 124 Column("NUM_FILHOS", pl.Utf8), 125 Column("INSTRU", pl.Utf8), 126 Column("CID_NOTIF", pl.Utf8), 127 Column("CONTRACEP1", pl.Utf8), 128 Column("CONTRACEP2", pl.Utf8), 129 Column("GESTRISCO", pl.Utf8), 130 Column("INSC_PN", pl.Utf8), 131 Column("SEQ_AIH5", pl.Utf8), 132 Column("CBOR", pl.Utf8), 133 Column("CNAER", pl.Utf8), 134 Column("VINCPREV", pl.Utf8), 135 Column("GESTOR_COD", pl.Utf8), 136 Column("GESTOR_TP", pl.Utf8), 137 Column("GESTOR_CPF", pl.Utf8), 138 Column("GESTOR_DT", pl.Utf8), 139 Column("CNES", pl.Utf8), 140 Column("CNPJ_MANT", pl.Utf8), 141 Column("INFEHOSP", pl.Utf8), 142 Column("CID_ASSO", pl.Utf8), 143 Column("CID_MORTE", pl.Utf8), 144 Column("COMPLEX", pl.UInt8), 145 Column("FINANC", pl.UInt8), 146 Column("FAEC_TP", pl.Utf8), 147 Column("REGCT", pl.Utf8), 148 Column("RACA_COR", pl.UInt8), 149 Column("ETNIA", pl.Utf8), 150 Column("SEQUENCIA", pl.Utf8), 151 Column("REMESSA", pl.Utf8), 152 Column("AUD_JUST", pl.Utf8), 153 Column("SIS_JUST", pl.Utf8), 154 Column("VAL_SH_FED", pl.Float64), 155 Column("VAL_SP_FED", pl.Float64), 156 Column("VAL_SH_GES", pl.Float64), 157 Column("VAL_SP_GES", pl.Float64), 158 Column("VAL_UCI", pl.Float64), 159 Column("MARCA_UCI", pl.UInt8), 160 Column("DIAGSEC1", pl.Utf8), 161 Column("DIAGSEC2", pl.Utf8), 162 Column("DIAGSEC3", pl.Utf8), 163 Column("DIAGSEC4", pl.Utf8), 164 Column("DIAGSEC5", pl.Utf8), 165 Column("DIAGSEC6", pl.Utf8), 166 Column("DIAGSEC7", pl.Utf8), 167 Column("DIAGSEC8", pl.Utf8), 168 Column("DIAGSEC9", pl.Utf8), 169 Column("TPDISEC1", pl.UInt8), 170 Column("TPDISEC2", pl.UInt8), 171 Column("TPDISEC3", pl.UInt8), 172 Column("TPDISEC4", pl.UInt8), 173 Column("TPDISEC5", pl.UInt8), 174 Column("TPDISEC6", pl.UInt8), 175 Column("TPDISEC7", pl.UInt8), 176 Column("TPDISEC8", pl.UInt8), 177 Column("TPDISEC9", pl.UInt8), 178 ], 179 )
MAIN_TABLE =
'SIH_RD'
def
import_sih_rd(db_file='datasus.db', years=['*'], states=['*'], months=['*']):
12def import_sih_rd(db_file="datasus.db", years=["*"], states=["*"], months=["*"]): 13 """Import RD (Autorização de Internação Hospitalar Reduzida) from SIMSUS (Sistema de Informações Hospitalares do SUS). 14 15 Args: 16 db_file (str, optional): path to the duckdb file in which the data will be imported to. Defaults to "datasus.db". 17 years (list, optional): list of years for which data will be imported (if available). Eg: `[2012, 2000, 2010]`. Defaults to ["*"]. 18 states (list, optional): list of brazilian 2 letters state for which data will be imported (if available). Eg: `["SP", "RJ"]`. Defaults to ["*"]. 19 months (list, optional): list of months numbers (1-12) for which data will be imported (if available). Eg: `[1, 12, 6]`. Defaults to ["*"]. 20 21 --- 22 23 Extra: 24 - **Data description**: https://github.com/mymatsubara/datasus-db/blob/main/docs/sih_rd.pdf 25 - **ftp path**: ftp.datasus.gov.br/dissemin/publicos/SIHSUS/200801_/Dados/RD*.dbc 26 """ 27 logging.info(f"⏳ [{MAIN_TABLE}] Starting import...") 28 29 import_from_ftp( 30 [MAIN_TABLE], 31 [ 32 f"/dissemin/publicos/SIHSUS/200801_/Dados/RD{state.upper()}{format_year(year)}{format_month(month)}.dbc*" 33 for year in years 34 for state in states 35 for month in months 36 ], 37 fetch_sih_rh, 38 db_file=db_file, 39 )
Import RD (Autorização de Internação Hospitalar Reduzida) from SIMSUS (Sistema de Informações Hospitalares do SUS).
Arguments:
- db_file (str, optional): path to the duckdb file in which the data will be imported to. Defaults to "datasus.db".
- years (list, optional): list of years for which data will be imported (if available). Eg:
[2012, 2000, 2010]
. Defaults to ["*"]. - states (list, optional): list of brazilian 2 letters state for which data will be imported (if available). Eg:
["SP", "RJ"]
. Defaults to ["*"]. - months (list, optional): list of months numbers (1-12) for which data will be imported (if available). Eg:
[1, 12, 6]
. Defaults to ["*"].
Extra:
- Data description: https://github.com/mymatsubara/datasus-db/blob/main/docs/sih_rd.pdf
- ftp path: ftp.datasus.gov.br/dissemin/publicos/SIHSUS/200801_/Dados/RD*.dbc
def
fetch_sih_rh(ftp_path: str):
def
map_sih_rd(df: polars.dataframe.frame.DataFrame):
47def map_sih_rd(df: pl.DataFrame): 48 df = df.with_columns( 49 pl.when(pl.col(pl.Utf8).str.len_chars() == 0) 50 .then(None) 51 .otherwise(pl.col(pl.Utf8)) 52 .name.keep(), 53 ).with_columns( 54 pl.when(pl.col("GESTOR_CPF").str.contains("[1-9]")) 55 .then(pl.col("GESTOR_CPF")) 56 .otherwise(None) 57 .name.keep(), 58 pl.when(pl.col("INSC_PN").str.contains("[1-9]")) 59 .then(pl.col("INSC_PN")) 60 .otherwise(None) 61 .name.keep(), 62 ) 63 64 return to_schema( 65 df, 66 [ 67 Column("UF_ZI", pl.Utf8), 68 Column("ANO_CMPT", pl.Utf8), 69 Column("MES_CMPT", pl.Utf8), 70 Column("ESPEC", pl.Utf8), 71 Column("CGC_HOSP", pl.Utf8), 72 Column("N_AIH", pl.Utf8), 73 Column("IDENT", pl.Utf8), 74 Column("CEP", pl.Utf8), 75 Column("MUNIC_RES", pl.Utf8), 76 Column("NASC", pl.Utf8), 77 Column("SEXO", pl.Utf8), 78 Column("UTI_MES_IN", pl.Utf8), 79 Column("UTI_MES_AN", pl.Utf8), 80 Column("UTI_MES_AL", pl.Utf8), 81 Column("UTI_MES_TO", pl.Utf8), 82 Column("UTI_INT_IN", pl.Utf8), 83 Column("UTI_INT_AN", pl.Utf8), 84 Column("UTI_INT_AL", pl.Utf8), 85 Column("UTI_INT_TO", pl.Utf8), 86 Column("DIAR_ACOM", pl.Utf8), 87 Column("QT_DIARIAS", pl.Utf8), 88 Column("PROC_SOLIC", pl.Utf8), 89 Column("PROC_REA", pl.Utf8), 90 Column("VAL_SH", pl.Float64), 91 Column("VAL_SP", pl.Float64), 92 Column("VAL_SADT", pl.Float64), 93 Column("VAL_RN", pl.Float64), 94 Column("VAL_ACOMP", pl.Float64), 95 Column("VAL_ORTP", pl.Float64), 96 Column("VAL_SANGUE", pl.Float64), 97 Column("VAL_SADTSR", pl.Float64), 98 Column("VAL_TRANSP", pl.Float64), 99 Column("VAL_OBSANG", pl.Float64), 100 Column("VAL_PED1AC", pl.Float64), 101 Column("VAL_TOT", pl.Float64), 102 Column("VAL_UTI", pl.Float64), 103 Column("US_TOT", pl.Float64), 104 Column("DT_INTER", pl.Utf8), 105 Column("DT_SAIDA", pl.Utf8), 106 Column("DIAG_PRINC", pl.Utf8), 107 Column("DIAG_SECUN", pl.Utf8), 108 Column("COBRANCA", pl.Utf8), 109 Column("NATUREZA", pl.Utf8), 110 Column("NAT_JUR", pl.Utf8), 111 Column("GESTAO", pl.Utf8), 112 Column("RUBRICA", pl.Utf8), 113 Column("IND_VDRL", pl.Utf8), 114 Column("MUNIC_MOV", pl.Utf8), 115 Column("COD_IDADE", pl.Utf8), 116 Column("IDADE", pl.Utf8, strict=False), 117 Column("DIAS_PERM", pl.Utf8), 118 Column("MORTE", pl.Utf8), 119 Column("NACIONAL", pl.Utf8), 120 Column("NUM_PROC", pl.Utf8), 121 Column("CAR_INT", pl.Utf8), 122 Column("TOT_PT_SP", pl.Utf8), 123 Column("CPF_AUT", pl.Utf8), 124 Column("HOMONIMO", pl.Utf8), 125 Column("NUM_FILHOS", pl.Utf8), 126 Column("INSTRU", pl.Utf8), 127 Column("CID_NOTIF", pl.Utf8), 128 Column("CONTRACEP1", pl.Utf8), 129 Column("CONTRACEP2", pl.Utf8), 130 Column("GESTRISCO", pl.Utf8), 131 Column("INSC_PN", pl.Utf8), 132 Column("SEQ_AIH5", pl.Utf8), 133 Column("CBOR", pl.Utf8), 134 Column("CNAER", pl.Utf8), 135 Column("VINCPREV", pl.Utf8), 136 Column("GESTOR_COD", pl.Utf8), 137 Column("GESTOR_TP", pl.Utf8), 138 Column("GESTOR_CPF", pl.Utf8), 139 Column("GESTOR_DT", pl.Utf8), 140 Column("CNES", pl.Utf8), 141 Column("CNPJ_MANT", pl.Utf8), 142 Column("INFEHOSP", pl.Utf8), 143 Column("CID_ASSO", pl.Utf8), 144 Column("CID_MORTE", pl.Utf8), 145 Column("COMPLEX", pl.UInt8), 146 Column("FINANC", pl.UInt8), 147 Column("FAEC_TP", pl.Utf8), 148 Column("REGCT", pl.Utf8), 149 Column("RACA_COR", pl.UInt8), 150 Column("ETNIA", pl.Utf8), 151 Column("SEQUENCIA", pl.Utf8), 152 Column("REMESSA", pl.Utf8), 153 Column("AUD_JUST", pl.Utf8), 154 Column("SIS_JUST", pl.Utf8), 155 Column("VAL_SH_FED", pl.Float64), 156 Column("VAL_SP_FED", pl.Float64), 157 Column("VAL_SH_GES", pl.Float64), 158 Column("VAL_SP_GES", pl.Float64), 159 Column("VAL_UCI", pl.Float64), 160 Column("MARCA_UCI", pl.UInt8), 161 Column("DIAGSEC1", pl.Utf8), 162 Column("DIAGSEC2", pl.Utf8), 163 Column("DIAGSEC3", pl.Utf8), 164 Column("DIAGSEC4", pl.Utf8), 165 Column("DIAGSEC5", pl.Utf8), 166 Column("DIAGSEC6", pl.Utf8), 167 Column("DIAGSEC7", pl.Utf8), 168 Column("DIAGSEC8", pl.Utf8), 169 Column("DIAGSEC9", pl.Utf8), 170 Column("TPDISEC1", pl.UInt8), 171 Column("TPDISEC2", pl.UInt8), 172 Column("TPDISEC3", pl.UInt8), 173 Column("TPDISEC4", pl.UInt8), 174 Column("TPDISEC5", pl.UInt8), 175 Column("TPDISEC6", pl.UInt8), 176 Column("TPDISEC7", pl.UInt8), 177 Column("TPDISEC8", pl.UInt8), 178 Column("TPDISEC9", pl.UInt8), 179 ], 180 )