datasus_db.datasources.auxiliar

  1import logging
  2import polars as pl
  3from ..pl_utils import fill_empty, Column, to_schema
  4from ..ftp import fetch_from_zip
  5from ..cnv import to_dataframe
  6from ..datasus import import_from_ftp
  7from ..dbf import read_as_df
  8
  9
 10MUNICIPIO_TABLE = "AUX_MUNICIPIO"
 11UF_TABLE = "AUX_UF"
 12CID10_DOENCA_TABLE = "AUX_CID10_DOENCA"
 13
 14
 15def import_auxiliar_tables(db_file="datasus.db"):
 16    """Import auxiliar tables with some datasus codes definitions (eg: municipios, doenças, ...)
 17
 18    Args:
 19        db_file (str, optional): path to the duckdb file in which the data will be imported to.
 20
 21    ---
 22
 23    Extra:
 24    - **Municipio data description**: https://github.com/mymatsubara/datasus-db/blob/main/docs/auxiliar/municipio.pdf
 25    - **ftp path**: ftp.datasus.gov.br/dissemin/publicos/SIM/CID10/DOCS/Docs_Tabs_CID10.zip
 26    """
 27    logging.info(f"⏳ [AUX_TABLES] Starting import...")
 28
 29    import_from_ftp(
 30        [CID10_DOENCA_TABLE, MUNICIPIO_TABLE, UF_TABLE],
 31        ["/dissemin/publicos/SIM/CID10/DOCS/Docs_Tabs_CID10.zip*"],
 32        fetch_sim_auxiliar,
 33        db_file=db_file,
 34    )
 35
 36
 37def fetch_sim_auxiliar(ftp_path: str):
 38    cid10_file = "TABELAS/CID10.DBF"
 39    municipio_file = "TABELAS/CADMUN.DBF"
 40    uf_file = "TABELAS/TABUF.DBF"
 41    files = fetch_from_zip(ftp_path, [cid10_file, municipio_file, uf_file])
 42
 43    cid10_df = read_as_df(cid10_file, files[cid10_file], encoding="cp850")
 44    municipio_df = read_as_df(municipio_file, files[municipio_file], encoding="cp850")
 45    uf_df = read_as_df(uf_file, files[uf_file], encoding="cp850")
 46
 47    return {
 48        CID10_DOENCA_TABLE: map_cid10(cid10_df),
 49        MUNICIPIO_TABLE: map_municipio(municipio_df),
 50        UF_TABLE: map_uf(uf_df),
 51    }
 52
 53
 54def map_cid10(df: pl.DataFrame):
 55    df = df.with_columns(fill_empty(None))
 56
 57    return to_schema(
 58        df,
 59        [
 60            Column("CID10", pl.Utf8),
 61            Column("OPC", pl.Utf8),
 62            Column("CAT", pl.Utf8),
 63            Column("SUBCAT", pl.Utf8),
 64            Column("DESCR", pl.Utf8),
 65            Column("RESTRSEXO", pl.UInt8),
 66        ],
 67    )
 68
 69
 70def map_municipio(df: pl.DataFrame):
 71    df = df.with_columns(fill_empty(None))
 72
 73    return to_schema(
 74        df,
 75        [
 76            Column("MUNCOD", pl.UInt32),
 77            Column("MUNCODDV", pl.UInt32),
 78            Column("SITUACAO", pl.Utf8),
 79            Column("MUNSINP", pl.UInt32),
 80            Column("MUNSIAFI", pl.UInt32),
 81            Column("MUNNOME", pl.Utf8),
 82            Column("MUNNOMEX", pl.Utf8),
 83            Column("OBSERV", pl.Utf8),
 84            Column("MUNSINON", pl.Utf8),
 85            Column("MUNSINONDV", pl.Utf8),
 86            Column("AMAZONIA", pl.Utf8),
 87            Column("FRONTEIRA", pl.Utf8),
 88            Column("CAPITAL", pl.Utf8),
 89            Column("UFCOD", pl.UInt8),
 90            Column("MESOCOD", pl.UInt16),
 91            Column("MICROCOD", pl.UInt16),
 92            Column("MSAUDCOD", pl.UInt16),
 93            Column("RSAUDCOD", pl.UInt16),
 94            Column("CSAUDCOD", pl.UInt16),
 95            Column("RMETRCOD", pl.UInt16),
 96            Column("AGLCOD", pl.UInt16),
 97            Column("ANOINST", pl.UInt16),
 98            Column("ANOEXT", pl.UInt16),
 99            Column("SUCESSOR", pl.UInt32),
100            Column("LATITUDE", pl.Float64),
101            Column("LONGITUDE", pl.Float64),
102            Column("ALTITUDE", pl.Float64),
103            Column("AREA", pl.Float64),
104        ],
105    )
106
107
108def map_uf(df: pl.DataFrame):
109    return to_schema(
110        df,
111        [
112            Column("SIGLA_UF", pl.Utf8),
113            Column("CODIGO", pl.UInt8),
114            Column("DESCRICAO", pl.Utf8),
115        ],
116    )
117
118
119def fetch_painel_oncologia_auxiliar(ftp_path: str):
120    municipio_file = "CNV/br_municip.cnv"
121    uf_file = "CNV/br_uf.cnv"
122
123    files = fetch_from_zip(ftp_path, [municipio_file, uf_file])
124
125    def read_as_df(file_name: str, id_dtype: pl.UInt32):
126        cnv_bytes = files[file_name]
127        df = to_dataframe(cnv_bytes, id_dtype=id_dtype)
128        return df.with_columns(
129            pl.col("NOME").str.split(" ").list.slice(1).list.join(" ")
130        )
131
132    return {
133        MUNICIPIO_TABLE: read_as_df(municipio_file, id_dtype=pl.UInt32),
134        UF_TABLE: read_as_df(uf_file, id_dtype=pl.UInt8),
135    }
MUNICIPIO_TABLE = 'AUX_MUNICIPIO'
UF_TABLE = 'AUX_UF'
CID10_DOENCA_TABLE = 'AUX_CID10_DOENCA'
def import_auxiliar_tables(db_file='datasus.db'):
16def import_auxiliar_tables(db_file="datasus.db"):
17    """Import auxiliar tables with some datasus codes definitions (eg: municipios, doenças, ...)
18
19    Args:
20        db_file (str, optional): path to the duckdb file in which the data will be imported to.
21
22    ---
23
24    Extra:
25    - **Municipio data description**: https://github.com/mymatsubara/datasus-db/blob/main/docs/auxiliar/municipio.pdf
26    - **ftp path**: ftp.datasus.gov.br/dissemin/publicos/SIM/CID10/DOCS/Docs_Tabs_CID10.zip
27    """
28    logging.info(f"⏳ [AUX_TABLES] Starting import...")
29
30    import_from_ftp(
31        [CID10_DOENCA_TABLE, MUNICIPIO_TABLE, UF_TABLE],
32        ["/dissemin/publicos/SIM/CID10/DOCS/Docs_Tabs_CID10.zip*"],
33        fetch_sim_auxiliar,
34        db_file=db_file,
35    )

Import auxiliar tables with some datasus codes definitions (eg: municipios, doenças, ...)

Arguments:
  • db_file (str, optional): path to the duckdb file in which the data will be imported to.

Extra:

def fetch_sim_auxiliar(ftp_path: str):
38def fetch_sim_auxiliar(ftp_path: str):
39    cid10_file = "TABELAS/CID10.DBF"
40    municipio_file = "TABELAS/CADMUN.DBF"
41    uf_file = "TABELAS/TABUF.DBF"
42    files = fetch_from_zip(ftp_path, [cid10_file, municipio_file, uf_file])
43
44    cid10_df = read_as_df(cid10_file, files[cid10_file], encoding="cp850")
45    municipio_df = read_as_df(municipio_file, files[municipio_file], encoding="cp850")
46    uf_df = read_as_df(uf_file, files[uf_file], encoding="cp850")
47
48    return {
49        CID10_DOENCA_TABLE: map_cid10(cid10_df),
50        MUNICIPIO_TABLE: map_municipio(municipio_df),
51        UF_TABLE: map_uf(uf_df),
52    }
def map_cid10(df: polars.dataframe.frame.DataFrame):
55def map_cid10(df: pl.DataFrame):
56    df = df.with_columns(fill_empty(None))
57
58    return to_schema(
59        df,
60        [
61            Column("CID10", pl.Utf8),
62            Column("OPC", pl.Utf8),
63            Column("CAT", pl.Utf8),
64            Column("SUBCAT", pl.Utf8),
65            Column("DESCR", pl.Utf8),
66            Column("RESTRSEXO", pl.UInt8),
67        ],
68    )
def map_municipio(df: polars.dataframe.frame.DataFrame):
 71def map_municipio(df: pl.DataFrame):
 72    df = df.with_columns(fill_empty(None))
 73
 74    return to_schema(
 75        df,
 76        [
 77            Column("MUNCOD", pl.UInt32),
 78            Column("MUNCODDV", pl.UInt32),
 79            Column("SITUACAO", pl.Utf8),
 80            Column("MUNSINP", pl.UInt32),
 81            Column("MUNSIAFI", pl.UInt32),
 82            Column("MUNNOME", pl.Utf8),
 83            Column("MUNNOMEX", pl.Utf8),
 84            Column("OBSERV", pl.Utf8),
 85            Column("MUNSINON", pl.Utf8),
 86            Column("MUNSINONDV", pl.Utf8),
 87            Column("AMAZONIA", pl.Utf8),
 88            Column("FRONTEIRA", pl.Utf8),
 89            Column("CAPITAL", pl.Utf8),
 90            Column("UFCOD", pl.UInt8),
 91            Column("MESOCOD", pl.UInt16),
 92            Column("MICROCOD", pl.UInt16),
 93            Column("MSAUDCOD", pl.UInt16),
 94            Column("RSAUDCOD", pl.UInt16),
 95            Column("CSAUDCOD", pl.UInt16),
 96            Column("RMETRCOD", pl.UInt16),
 97            Column("AGLCOD", pl.UInt16),
 98            Column("ANOINST", pl.UInt16),
 99            Column("ANOEXT", pl.UInt16),
100            Column("SUCESSOR", pl.UInt32),
101            Column("LATITUDE", pl.Float64),
102            Column("LONGITUDE", pl.Float64),
103            Column("ALTITUDE", pl.Float64),
104            Column("AREA", pl.Float64),
105        ],
106    )
def map_uf(df: polars.dataframe.frame.DataFrame):
109def map_uf(df: pl.DataFrame):
110    return to_schema(
111        df,
112        [
113            Column("SIGLA_UF", pl.Utf8),
114            Column("CODIGO", pl.UInt8),
115            Column("DESCRICAO", pl.Utf8),
116        ],
117    )
def fetch_painel_oncologia_auxiliar(ftp_path: str):
120def fetch_painel_oncologia_auxiliar(ftp_path: str):
121    municipio_file = "CNV/br_municip.cnv"
122    uf_file = "CNV/br_uf.cnv"
123
124    files = fetch_from_zip(ftp_path, [municipio_file, uf_file])
125
126    def read_as_df(file_name: str, id_dtype: pl.UInt32):
127        cnv_bytes = files[file_name]
128        df = to_dataframe(cnv_bytes, id_dtype=id_dtype)
129        return df.with_columns(
130            pl.col("NOME").str.split(" ").list.slice(1).list.join(" ")
131        )
132
133    return {
134        MUNICIPIO_TABLE: read_as_df(municipio_file, id_dtype=pl.UInt32),
135        UF_TABLE: read_as_df(uf_file, id_dtype=pl.UInt8),
136    }