datasus_db.datasources.sia_pa

  1import polars as pl
  2import logging
  3from ..pl_utils import to_schema, Column, DateColumn
  4from ..datasus import import_from_ftp
  5from ..utils import format_year, format_month
  6from ..ftp import fetch_dbc_as_df
  7
  8MAIN_TABLE = "SIA_PA"
  9
 10
 11def import_sia_pa(db_file="datasus.db", years=["*"], states=["*"], months=["*"]):
 12    """Import PA (Produção Ambulatorial) from SIASUS (Sistema de Informações Ambulatorial do SUS).
 13
 14    Args:
 15        db_file (str, optional): path to the duckdb file in which the data will be imported to. Defaults to "datasus.db".
 16        years (list, optional): list of years for which data will be imported (if available). Eg: `[2012, 2000, 2010]`. Defaults to ["*"].
 17        states (list, optional): list of brazilian 2 letters state for which data will be imported (if available). Eg: `["SP", "RJ"]`. Defaults to ["*"].
 18        months (list, optional): list of months numbers (1-12) for which data will be imported (if available). Eg: `[1, 12, 6]`. Defaults to ["*"].
 19
 20    ---
 21
 22    Extra:
 23    - **Data description**: https://github.com/mymatsubara/datasus-db/blob/main/docs/sia_pa.pdf
 24    - **ftp path**: ftp.datasus.gov.br/dissemin/publicos/SIASUS/200801_/Dados/PA*.dbc
 25    """
 26    logging.info(f"⏳ [{MAIN_TABLE}] Starting import...")
 27    import_from_ftp(
 28        [MAIN_TABLE],
 29        [
 30            f"/dissemin/publicos/SIASUS/200801_/Dados/PA{state.upper()}{format_year(year)}{format_month(month)}*.dbc"
 31            for year in years
 32            for state in states
 33            for month in months
 34        ],
 35        fetch_sia_rh,
 36        db_file=db_file,
 37    )
 38
 39
 40def fetch_sia_rh(ftp_path: str):
 41    df = fetch_dbc_as_df(ftp_path)
 42    return {MAIN_TABLE: map_sia_pa(df)}
 43
 44
 45def map_sia_pa(df: pl.DataFrame):
 46    df = df.with_columns(
 47        pl.when(pl.col(pl.Utf8).str.len_chars() == 0)
 48        .then(None)
 49        .otherwise(pl.col(pl.Utf8))
 50        .name.keep(),
 51    )
 52
 53    return to_schema(
 54        df,
 55        [
 56            Column("PA_CODUNI", pl.Utf8),
 57            Column("PA_GESTAO", pl.Utf8),
 58            Column("PA_CONDIC", pl.Utf8),
 59            Column("PA_UFMUN", pl.Utf8),
 60            Column("PA_REGCT", pl.Utf8),
 61            Column("PA_INCOUT", pl.Utf8),
 62            Column("PA_INCURG", pl.Utf8),
 63            Column("PA_TPUPS", pl.Utf8),
 64            Column("PA_TIPPRE", pl.Utf8),
 65            Column("PA_MN_IND", pl.Utf8),
 66            Column("PA_CNPJCPF", pl.Utf8),
 67            Column("PA_CNPJMNT", pl.Utf8),
 68            Column("PA_CNPJ_CC", pl.Utf8),
 69            Column("PA_MVM", pl.Utf8),
 70            Column("PA_CMP", pl.Utf8),
 71            Column("PA_PROC_ID", pl.Utf8),
 72            Column("PA_TPFIN", pl.Utf8),
 73            Column("PA_SUBFIN", pl.Utf8),
 74            Column("PA_NIVCPL", pl.Utf8),
 75            Column("PA_DOCORIG", pl.Utf8),
 76            Column("PA_AUTORIZ", pl.Utf8),
 77            Column("PA_CNSMED", pl.Utf8),
 78            Column("PA_CBOCOD", pl.Utf8),
 79            Column("PA_MOTSAI", pl.Utf8),
 80            Column("PA_OBITO", pl.Utf8),
 81            Column("PA_ENCERR", pl.Utf8),
 82            Column("PA_PERMAN", pl.Utf8),
 83            Column("PA_ALTA", pl.Utf8),
 84            Column("PA_TRANSF", pl.Utf8),
 85            Column("PA_CIDPRI", pl.Utf8),
 86            Column("PA_CIDSEC", pl.Utf8),
 87            Column("PA_CIDCAS", pl.Utf8),
 88            Column("PA_CATEND", pl.Utf8),
 89            Column("PA_IDADE", pl.Utf8),
 90            Column("IDADEMIN", pl.Utf8),
 91            Column("IDADEMAX", pl.Utf8),
 92            Column("PA_FLIDADE", pl.Utf8),
 93            Column("PA_SEXO", pl.Utf8),
 94            Column("PA_RACACOR", pl.Utf8),
 95            Column("PA_MUNPCN", pl.Utf8),
 96            Column("PA_QTDPRO", pl.Utf8),
 97            Column("PA_QTDAPR", pl.Utf8),
 98            Column("PA_VALPRO", pl.Utf8),
 99            Column("PA_VALAPR", pl.Utf8),
100            Column("PA_UFDIF", pl.Utf8),
101            Column("PA_MNDIF", pl.Utf8),
102            Column("PA_DIF_VAL", pl.Utf8),
103            Column("NU_VPA_TOT", pl.Utf8),
104            Column("NU_PA_TOT", pl.Utf8),
105            Column("PA_INDICA", pl.Utf8),
106            Column("PA_CODOCO", pl.Utf8),
107            Column("PA_FLQT", pl.Utf8),
108            Column("PA_FLER", pl.Utf8),
109            Column("PA_ETNIA", pl.Utf8),
110            Column("PA_VL_CF", pl.Float64),
111            Column("PA_VL_CL", pl.Float64),
112            Column("PA_VL_INC", pl.Float64),
113            Column("PA_SRV_C", pl.Utf8),
114            Column("PA_INE", pl.Utf8),
115            Column("PA_NAT_JUR", pl.Utf8),
116        ],
117    )
MAIN_TABLE = 'SIA_PA'
def import_sia_pa(db_file='datasus.db', years=['*'], states=['*'], months=['*']):
12def import_sia_pa(db_file="datasus.db", years=["*"], states=["*"], months=["*"]):
13    """Import PA (Produção Ambulatorial) from SIASUS (Sistema de Informações Ambulatorial do SUS).
14
15    Args:
16        db_file (str, optional): path to the duckdb file in which the data will be imported to. Defaults to "datasus.db".
17        years (list, optional): list of years for which data will be imported (if available). Eg: `[2012, 2000, 2010]`. Defaults to ["*"].
18        states (list, optional): list of brazilian 2 letters state for which data will be imported (if available). Eg: `["SP", "RJ"]`. Defaults to ["*"].
19        months (list, optional): list of months numbers (1-12) for which data will be imported (if available). Eg: `[1, 12, 6]`. Defaults to ["*"].
20
21    ---
22
23    Extra:
24    - **Data description**: https://github.com/mymatsubara/datasus-db/blob/main/docs/sia_pa.pdf
25    - **ftp path**: ftp.datasus.gov.br/dissemin/publicos/SIASUS/200801_/Dados/PA*.dbc
26    """
27    logging.info(f"⏳ [{MAIN_TABLE}] Starting import...")
28    import_from_ftp(
29        [MAIN_TABLE],
30        [
31            f"/dissemin/publicos/SIASUS/200801_/Dados/PA{state.upper()}{format_year(year)}{format_month(month)}*.dbc"
32            for year in years
33            for state in states
34            for month in months
35        ],
36        fetch_sia_rh,
37        db_file=db_file,
38    )

Import PA (Produção Ambulatorial) from SIASUS (Sistema de Informações Ambulatorial do SUS).

Arguments:
  • db_file (str, optional): path to the duckdb file in which the data will be imported to. Defaults to "datasus.db".
  • years (list, optional): list of years for which data will be imported (if available). Eg: [2012, 2000, 2010]. Defaults to ["*"].
  • states (list, optional): list of brazilian 2 letters state for which data will be imported (if available). Eg: ["SP", "RJ"]. Defaults to ["*"].
  • months (list, optional): list of months numbers (1-12) for which data will be imported (if available). Eg: [1, 12, 6]. Defaults to ["*"].

Extra:

def fetch_sia_rh(ftp_path: str):
41def fetch_sia_rh(ftp_path: str):
42    df = fetch_dbc_as_df(ftp_path)
43    return {MAIN_TABLE: map_sia_pa(df)}
def map_sia_pa(df: polars.dataframe.frame.DataFrame):
 46def map_sia_pa(df: pl.DataFrame):
 47    df = df.with_columns(
 48        pl.when(pl.col(pl.Utf8).str.len_chars() == 0)
 49        .then(None)
 50        .otherwise(pl.col(pl.Utf8))
 51        .name.keep(),
 52    )
 53
 54    return to_schema(
 55        df,
 56        [
 57            Column("PA_CODUNI", pl.Utf8),
 58            Column("PA_GESTAO", pl.Utf8),
 59            Column("PA_CONDIC", pl.Utf8),
 60            Column("PA_UFMUN", pl.Utf8),
 61            Column("PA_REGCT", pl.Utf8),
 62            Column("PA_INCOUT", pl.Utf8),
 63            Column("PA_INCURG", pl.Utf8),
 64            Column("PA_TPUPS", pl.Utf8),
 65            Column("PA_TIPPRE", pl.Utf8),
 66            Column("PA_MN_IND", pl.Utf8),
 67            Column("PA_CNPJCPF", pl.Utf8),
 68            Column("PA_CNPJMNT", pl.Utf8),
 69            Column("PA_CNPJ_CC", pl.Utf8),
 70            Column("PA_MVM", pl.Utf8),
 71            Column("PA_CMP", pl.Utf8),
 72            Column("PA_PROC_ID", pl.Utf8),
 73            Column("PA_TPFIN", pl.Utf8),
 74            Column("PA_SUBFIN", pl.Utf8),
 75            Column("PA_NIVCPL", pl.Utf8),
 76            Column("PA_DOCORIG", pl.Utf8),
 77            Column("PA_AUTORIZ", pl.Utf8),
 78            Column("PA_CNSMED", pl.Utf8),
 79            Column("PA_CBOCOD", pl.Utf8),
 80            Column("PA_MOTSAI", pl.Utf8),
 81            Column("PA_OBITO", pl.Utf8),
 82            Column("PA_ENCERR", pl.Utf8),
 83            Column("PA_PERMAN", pl.Utf8),
 84            Column("PA_ALTA", pl.Utf8),
 85            Column("PA_TRANSF", pl.Utf8),
 86            Column("PA_CIDPRI", pl.Utf8),
 87            Column("PA_CIDSEC", pl.Utf8),
 88            Column("PA_CIDCAS", pl.Utf8),
 89            Column("PA_CATEND", pl.Utf8),
 90            Column("PA_IDADE", pl.Utf8),
 91            Column("IDADEMIN", pl.Utf8),
 92            Column("IDADEMAX", pl.Utf8),
 93            Column("PA_FLIDADE", pl.Utf8),
 94            Column("PA_SEXO", pl.Utf8),
 95            Column("PA_RACACOR", pl.Utf8),
 96            Column("PA_MUNPCN", pl.Utf8),
 97            Column("PA_QTDPRO", pl.Utf8),
 98            Column("PA_QTDAPR", pl.Utf8),
 99            Column("PA_VALPRO", pl.Utf8),
100            Column("PA_VALAPR", pl.Utf8),
101            Column("PA_UFDIF", pl.Utf8),
102            Column("PA_MNDIF", pl.Utf8),
103            Column("PA_DIF_VAL", pl.Utf8),
104            Column("NU_VPA_TOT", pl.Utf8),
105            Column("NU_PA_TOT", pl.Utf8),
106            Column("PA_INDICA", pl.Utf8),
107            Column("PA_CODOCO", pl.Utf8),
108            Column("PA_FLQT", pl.Utf8),
109            Column("PA_FLER", pl.Utf8),
110            Column("PA_ETNIA", pl.Utf8),
111            Column("PA_VL_CF", pl.Float64),
112            Column("PA_VL_CL", pl.Float64),
113            Column("PA_VL_INC", pl.Float64),
114            Column("PA_SRV_C", pl.Utf8),
115            Column("PA_INE", pl.Utf8),
116            Column("PA_NAT_JUR", pl.Utf8),
117        ],
118    )