datasus_db.datasources.sih_rd

  1import polars as pl
  2import logging
  3from ..pl_utils import to_schema, Column, DateColumn
  4from ..datasus import import_from_ftp
  5from ..utils import format_year, format_month
  6from ..ftp import fetch_dbc_as_df
  7
  8MAIN_TABLE = "SIH_RD"
  9
 10
 11def import_sih_rd(db_file="datasus.db", years=["*"], states=["*"], months=["*"]):
 12    """Import RD (Autorização de Internação Hospitalar Reduzida) from SIMSUS (Sistema de Informações Hospitalares do SUS).
 13
 14    Args:
 15        db_file (str, optional): path to the duckdb file in which the data will be imported to. Defaults to "datasus.db".
 16        years (list, optional): list of years for which data will be imported (if available). Eg: `[2012, 2000, 2010]`. Defaults to ["*"].
 17        states (list, optional): list of brazilian 2 letters state for which data will be imported (if available). Eg: `["SP", "RJ"]`. Defaults to ["*"].
 18        months (list, optional): list of months numbers (1-12) for which data will be imported (if available). Eg: `[1, 12, 6]`. Defaults to ["*"].
 19
 20    ---
 21
 22    Extra:
 23    - **Data description**: https://github.com/mymatsubara/datasus-db/blob/main/docs/sih_rd.pdf
 24    - **ftp path**: ftp.datasus.gov.br/dissemin/publicos/SIHSUS/200801_/Dados/RD*.dbc
 25    """
 26    logging.info(f"⏳ [{MAIN_TABLE}] Starting import...")
 27
 28    import_from_ftp(
 29        [MAIN_TABLE],
 30        [
 31            f"/dissemin/publicos/SIHSUS/200801_/Dados/RD{state.upper()}{format_year(year)}{format_month(month)}.dbc*"
 32            for year in years
 33            for state in states
 34            for month in months
 35        ],
 36        fetch_sih_rh,
 37        db_file=db_file,
 38    )
 39
 40
 41def fetch_sih_rh(ftp_path: str):
 42    df = fetch_dbc_as_df(ftp_path)
 43    return {MAIN_TABLE: map_sih_rd(df)}
 44
 45
 46def map_sih_rd(df: pl.DataFrame):
 47    df = df.with_columns(
 48        pl.when(pl.col(pl.Utf8).str.len_chars() == 0)
 49        .then(None)
 50        .otherwise(pl.col(pl.Utf8))
 51        .name.keep(),
 52    ).with_columns(
 53        pl.when(pl.col("GESTOR_CPF").str.contains("[1-9]"))
 54        .then(pl.col("GESTOR_CPF"))
 55        .otherwise(None)
 56        .name.keep(),
 57        pl.when(pl.col("INSC_PN").str.contains("[1-9]"))
 58        .then(pl.col("INSC_PN"))
 59        .otherwise(None)
 60        .name.keep(),
 61    )
 62
 63    return to_schema(
 64        df,
 65        [
 66            Column("UF_ZI", pl.Utf8),
 67            Column("ANO_CMPT", pl.Utf8),
 68            Column("MES_CMPT", pl.Utf8),
 69            Column("ESPEC", pl.Utf8),
 70            Column("CGC_HOSP", pl.Utf8),
 71            Column("N_AIH", pl.Utf8),
 72            Column("IDENT", pl.Utf8),
 73            Column("CEP", pl.Utf8),
 74            Column("MUNIC_RES", pl.Utf8),
 75            Column("NASC", pl.Utf8),
 76            Column("SEXO", pl.Utf8),
 77            Column("UTI_MES_IN", pl.Utf8),
 78            Column("UTI_MES_AN", pl.Utf8),
 79            Column("UTI_MES_AL", pl.Utf8),
 80            Column("UTI_MES_TO", pl.Utf8),
 81            Column("UTI_INT_IN", pl.Utf8),
 82            Column("UTI_INT_AN", pl.Utf8),
 83            Column("UTI_INT_AL", pl.Utf8),
 84            Column("UTI_INT_TO", pl.Utf8),
 85            Column("DIAR_ACOM", pl.Utf8),
 86            Column("QT_DIARIAS", pl.Utf8),
 87            Column("PROC_SOLIC", pl.Utf8),
 88            Column("PROC_REA", pl.Utf8),
 89            Column("VAL_SH", pl.Float64),
 90            Column("VAL_SP", pl.Float64),
 91            Column("VAL_SADT", pl.Float64),
 92            Column("VAL_RN", pl.Float64),
 93            Column("VAL_ACOMP", pl.Float64),
 94            Column("VAL_ORTP", pl.Float64),
 95            Column("VAL_SANGUE", pl.Float64),
 96            Column("VAL_SADTSR", pl.Float64),
 97            Column("VAL_TRANSP", pl.Float64),
 98            Column("VAL_OBSANG", pl.Float64),
 99            Column("VAL_PED1AC", pl.Float64),
100            Column("VAL_TOT", pl.Float64),
101            Column("VAL_UTI", pl.Float64),
102            Column("US_TOT", pl.Float64),
103            Column("DT_INTER", pl.Utf8),
104            Column("DT_SAIDA", pl.Utf8),
105            Column("DIAG_PRINC", pl.Utf8),
106            Column("DIAG_SECUN", pl.Utf8),
107            Column("COBRANCA", pl.Utf8),
108            Column("NATUREZA", pl.Utf8),
109            Column("NAT_JUR", pl.Utf8),
110            Column("GESTAO", pl.Utf8),
111            Column("RUBRICA", pl.Utf8),
112            Column("IND_VDRL", pl.Utf8),
113            Column("MUNIC_MOV", pl.Utf8),
114            Column("COD_IDADE", pl.Utf8),
115            Column("IDADE", pl.Utf8, strict=False),
116            Column("DIAS_PERM", pl.Utf8),
117            Column("MORTE", pl.Utf8),
118            Column("NACIONAL", pl.Utf8),
119            Column("NUM_PROC", pl.Utf8),
120            Column("CAR_INT", pl.Utf8),
121            Column("TOT_PT_SP", pl.Utf8),
122            Column("CPF_AUT", pl.Utf8),
123            Column("HOMONIMO", pl.Utf8),
124            Column("NUM_FILHOS", pl.Utf8),
125            Column("INSTRU", pl.Utf8),
126            Column("CID_NOTIF", pl.Utf8),
127            Column("CONTRACEP1", pl.Utf8),
128            Column("CONTRACEP2", pl.Utf8),
129            Column("GESTRISCO", pl.Utf8),
130            Column("INSC_PN", pl.Utf8),
131            Column("SEQ_AIH5", pl.Utf8),
132            Column("CBOR", pl.Utf8),
133            Column("CNAER", pl.Utf8),
134            Column("VINCPREV", pl.Utf8),
135            Column("GESTOR_COD", pl.Utf8),
136            Column("GESTOR_TP", pl.Utf8),
137            Column("GESTOR_CPF", pl.Utf8),
138            Column("GESTOR_DT", pl.Utf8),
139            Column("CNES", pl.Utf8),
140            Column("CNPJ_MANT", pl.Utf8),
141            Column("INFEHOSP", pl.Utf8),
142            Column("CID_ASSO", pl.Utf8),
143            Column("CID_MORTE", pl.Utf8),
144            Column("COMPLEX", pl.UInt8),
145            Column("FINANC", pl.UInt8),
146            Column("FAEC_TP", pl.Utf8),
147            Column("REGCT", pl.Utf8),
148            Column("RACA_COR", pl.UInt8),
149            Column("ETNIA", pl.Utf8),
150            Column("SEQUENCIA", pl.Utf8),
151            Column("REMESSA", pl.Utf8),
152            Column("AUD_JUST", pl.Utf8),
153            Column("SIS_JUST", pl.Utf8),
154            Column("VAL_SH_FED", pl.Float64),
155            Column("VAL_SP_FED", pl.Float64),
156            Column("VAL_SH_GES", pl.Float64),
157            Column("VAL_SP_GES", pl.Float64),
158            Column("VAL_UCI", pl.Float64),
159            Column("MARCA_UCI", pl.UInt8),
160            Column("DIAGSEC1", pl.Utf8),
161            Column("DIAGSEC2", pl.Utf8),
162            Column("DIAGSEC3", pl.Utf8),
163            Column("DIAGSEC4", pl.Utf8),
164            Column("DIAGSEC5", pl.Utf8),
165            Column("DIAGSEC6", pl.Utf8),
166            Column("DIAGSEC7", pl.Utf8),
167            Column("DIAGSEC8", pl.Utf8),
168            Column("DIAGSEC9", pl.Utf8),
169            Column("TPDISEC1", pl.UInt8),
170            Column("TPDISEC2", pl.UInt8),
171            Column("TPDISEC3", pl.UInt8),
172            Column("TPDISEC4", pl.UInt8),
173            Column("TPDISEC5", pl.UInt8),
174            Column("TPDISEC6", pl.UInt8),
175            Column("TPDISEC7", pl.UInt8),
176            Column("TPDISEC8", pl.UInt8),
177            Column("TPDISEC9", pl.UInt8),
178        ],
179    )
MAIN_TABLE = 'SIH_RD'
def import_sih_rd(db_file='datasus.db', years=['*'], states=['*'], months=['*']):
12def import_sih_rd(db_file="datasus.db", years=["*"], states=["*"], months=["*"]):
13    """Import RD (Autorização de Internação Hospitalar Reduzida) from SIMSUS (Sistema de Informações Hospitalares do SUS).
14
15    Args:
16        db_file (str, optional): path to the duckdb file in which the data will be imported to. Defaults to "datasus.db".
17        years (list, optional): list of years for which data will be imported (if available). Eg: `[2012, 2000, 2010]`. Defaults to ["*"].
18        states (list, optional): list of brazilian 2 letters state for which data will be imported (if available). Eg: `["SP", "RJ"]`. Defaults to ["*"].
19        months (list, optional): list of months numbers (1-12) for which data will be imported (if available). Eg: `[1, 12, 6]`. Defaults to ["*"].
20
21    ---
22
23    Extra:
24    - **Data description**: https://github.com/mymatsubara/datasus-db/blob/main/docs/sih_rd.pdf
25    - **ftp path**: ftp.datasus.gov.br/dissemin/publicos/SIHSUS/200801_/Dados/RD*.dbc
26    """
27    logging.info(f"⏳ [{MAIN_TABLE}] Starting import...")
28
29    import_from_ftp(
30        [MAIN_TABLE],
31        [
32            f"/dissemin/publicos/SIHSUS/200801_/Dados/RD{state.upper()}{format_year(year)}{format_month(month)}.dbc*"
33            for year in years
34            for state in states
35            for month in months
36        ],
37        fetch_sih_rh,
38        db_file=db_file,
39    )

Import RD (Autorização de Internação Hospitalar Reduzida) from SIMSUS (Sistema de Informações Hospitalares do SUS).

Arguments:
  • db_file (str, optional): path to the duckdb file in which the data will be imported to. Defaults to "datasus.db".
  • years (list, optional): list of years for which data will be imported (if available). Eg: [2012, 2000, 2010]. Defaults to ["*"].
  • states (list, optional): list of brazilian 2 letters state for which data will be imported (if available). Eg: ["SP", "RJ"]. Defaults to ["*"].
  • months (list, optional): list of months numbers (1-12) for which data will be imported (if available). Eg: [1, 12, 6]. Defaults to ["*"].

Extra:

def fetch_sih_rh(ftp_path: str):
42def fetch_sih_rh(ftp_path: str):
43    df = fetch_dbc_as_df(ftp_path)
44    return {MAIN_TABLE: map_sih_rd(df)}
def map_sih_rd(df: polars.dataframe.frame.DataFrame):
 47def map_sih_rd(df: pl.DataFrame):
 48    df = df.with_columns(
 49        pl.when(pl.col(pl.Utf8).str.len_chars() == 0)
 50        .then(None)
 51        .otherwise(pl.col(pl.Utf8))
 52        .name.keep(),
 53    ).with_columns(
 54        pl.when(pl.col("GESTOR_CPF").str.contains("[1-9]"))
 55        .then(pl.col("GESTOR_CPF"))
 56        .otherwise(None)
 57        .name.keep(),
 58        pl.when(pl.col("INSC_PN").str.contains("[1-9]"))
 59        .then(pl.col("INSC_PN"))
 60        .otherwise(None)
 61        .name.keep(),
 62    )
 63
 64    return to_schema(
 65        df,
 66        [
 67            Column("UF_ZI", pl.Utf8),
 68            Column("ANO_CMPT", pl.Utf8),
 69            Column("MES_CMPT", pl.Utf8),
 70            Column("ESPEC", pl.Utf8),
 71            Column("CGC_HOSP", pl.Utf8),
 72            Column("N_AIH", pl.Utf8),
 73            Column("IDENT", pl.Utf8),
 74            Column("CEP", pl.Utf8),
 75            Column("MUNIC_RES", pl.Utf8),
 76            Column("NASC", pl.Utf8),
 77            Column("SEXO", pl.Utf8),
 78            Column("UTI_MES_IN", pl.Utf8),
 79            Column("UTI_MES_AN", pl.Utf8),
 80            Column("UTI_MES_AL", pl.Utf8),
 81            Column("UTI_MES_TO", pl.Utf8),
 82            Column("UTI_INT_IN", pl.Utf8),
 83            Column("UTI_INT_AN", pl.Utf8),
 84            Column("UTI_INT_AL", pl.Utf8),
 85            Column("UTI_INT_TO", pl.Utf8),
 86            Column("DIAR_ACOM", pl.Utf8),
 87            Column("QT_DIARIAS", pl.Utf8),
 88            Column("PROC_SOLIC", pl.Utf8),
 89            Column("PROC_REA", pl.Utf8),
 90            Column("VAL_SH", pl.Float64),
 91            Column("VAL_SP", pl.Float64),
 92            Column("VAL_SADT", pl.Float64),
 93            Column("VAL_RN", pl.Float64),
 94            Column("VAL_ACOMP", pl.Float64),
 95            Column("VAL_ORTP", pl.Float64),
 96            Column("VAL_SANGUE", pl.Float64),
 97            Column("VAL_SADTSR", pl.Float64),
 98            Column("VAL_TRANSP", pl.Float64),
 99            Column("VAL_OBSANG", pl.Float64),
100            Column("VAL_PED1AC", pl.Float64),
101            Column("VAL_TOT", pl.Float64),
102            Column("VAL_UTI", pl.Float64),
103            Column("US_TOT", pl.Float64),
104            Column("DT_INTER", pl.Utf8),
105            Column("DT_SAIDA", pl.Utf8),
106            Column("DIAG_PRINC", pl.Utf8),
107            Column("DIAG_SECUN", pl.Utf8),
108            Column("COBRANCA", pl.Utf8),
109            Column("NATUREZA", pl.Utf8),
110            Column("NAT_JUR", pl.Utf8),
111            Column("GESTAO", pl.Utf8),
112            Column("RUBRICA", pl.Utf8),
113            Column("IND_VDRL", pl.Utf8),
114            Column("MUNIC_MOV", pl.Utf8),
115            Column("COD_IDADE", pl.Utf8),
116            Column("IDADE", pl.Utf8, strict=False),
117            Column("DIAS_PERM", pl.Utf8),
118            Column("MORTE", pl.Utf8),
119            Column("NACIONAL", pl.Utf8),
120            Column("NUM_PROC", pl.Utf8),
121            Column("CAR_INT", pl.Utf8),
122            Column("TOT_PT_SP", pl.Utf8),
123            Column("CPF_AUT", pl.Utf8),
124            Column("HOMONIMO", pl.Utf8),
125            Column("NUM_FILHOS", pl.Utf8),
126            Column("INSTRU", pl.Utf8),
127            Column("CID_NOTIF", pl.Utf8),
128            Column("CONTRACEP1", pl.Utf8),
129            Column("CONTRACEP2", pl.Utf8),
130            Column("GESTRISCO", pl.Utf8),
131            Column("INSC_PN", pl.Utf8),
132            Column("SEQ_AIH5", pl.Utf8),
133            Column("CBOR", pl.Utf8),
134            Column("CNAER", pl.Utf8),
135            Column("VINCPREV", pl.Utf8),
136            Column("GESTOR_COD", pl.Utf8),
137            Column("GESTOR_TP", pl.Utf8),
138            Column("GESTOR_CPF", pl.Utf8),
139            Column("GESTOR_DT", pl.Utf8),
140            Column("CNES", pl.Utf8),
141            Column("CNPJ_MANT", pl.Utf8),
142            Column("INFEHOSP", pl.Utf8),
143            Column("CID_ASSO", pl.Utf8),
144            Column("CID_MORTE", pl.Utf8),
145            Column("COMPLEX", pl.UInt8),
146            Column("FINANC", pl.UInt8),
147            Column("FAEC_TP", pl.Utf8),
148            Column("REGCT", pl.Utf8),
149            Column("RACA_COR", pl.UInt8),
150            Column("ETNIA", pl.Utf8),
151            Column("SEQUENCIA", pl.Utf8),
152            Column("REMESSA", pl.Utf8),
153            Column("AUD_JUST", pl.Utf8),
154            Column("SIS_JUST", pl.Utf8),
155            Column("VAL_SH_FED", pl.Float64),
156            Column("VAL_SP_FED", pl.Float64),
157            Column("VAL_SH_GES", pl.Float64),
158            Column("VAL_SP_GES", pl.Float64),
159            Column("VAL_UCI", pl.Float64),
160            Column("MARCA_UCI", pl.UInt8),
161            Column("DIAGSEC1", pl.Utf8),
162            Column("DIAGSEC2", pl.Utf8),
163            Column("DIAGSEC3", pl.Utf8),
164            Column("DIAGSEC4", pl.Utf8),
165            Column("DIAGSEC5", pl.Utf8),
166            Column("DIAGSEC6", pl.Utf8),
167            Column("DIAGSEC7", pl.Utf8),
168            Column("DIAGSEC8", pl.Utf8),
169            Column("DIAGSEC9", pl.Utf8),
170            Column("TPDISEC1", pl.UInt8),
171            Column("TPDISEC2", pl.UInt8),
172            Column("TPDISEC3", pl.UInt8),
173            Column("TPDISEC4", pl.UInt8),
174            Column("TPDISEC5", pl.UInt8),
175            Column("TPDISEC6", pl.UInt8),
176            Column("TPDISEC7", pl.UInt8),
177            Column("TPDISEC8", pl.UInt8),
178            Column("TPDISEC9", pl.UInt8),
179        ],
180    )