avwx.parsing.sanitization.taf

TAF sanitization support.

 1"""TAF sanitization support."""
 2
 3# module
 4from avwx.parsing.sanitization.base import sanitize_list_with, sanitize_string_with
 5from avwx.parsing.sanitization.cleaners.base import CleanerListType
 6from avwx.parsing.sanitization.cleaners.cleaners import OnlySlashes, TrimWxCode
 7from avwx.parsing.sanitization.cleaners.joined import (
 8    JoinedCloud,
 9    JoinedMinMaxTemperature,
10    JoinedTafNewLine,
11    JoinedTimestamp,
12    JoinedWind,
13)
14from avwx.parsing.sanitization.cleaners.remove import RemoveFromTaf, RemoveTafAmend
15from avwx.parsing.sanitization.cleaners.replace import CURRENT, ReplaceItem
16from avwx.parsing.sanitization.cleaners.separated import (
17    SeparatedAltimeterLetter,
18    SeparatedCloudAltitude,
19    SeparatedCloudQualifier,
20    SeparatedDistance,
21    SeparatedFirstTemperature,
22    SeparatedMinMaxTemperaturePrefix,
23    SeparatedSecondTemperature,
24    SeparatedTafTimePrefix,
25    SeparatedTemperatureTrailingDigit,
26    SeparatedWindUnit,
27)
28from avwx.parsing.sanitization.cleaners.visibility import VisibilityGreaterThan
29from avwx.parsing.sanitization.cleaners.wind import (
30    DoubleGust,
31    EmptyWind,
32    MisplaceWindKT,
33    NonGGust,
34    RemoveVrbLeadingDigits,
35    WindLeadingMistype,
36)
37
38TAF_REPL = {
39    **CURRENT,
40    "Z/ ": "Z ",
41    " PROBB": " PROB",
42    " PROBN": " PROB",
43    " PROB3P": "PROB30",
44    " TMM": " TNM",
45    " TMN": " TNM",
46    " TXN": " TXM",
47    " TNTN": " TN",
48    " TXTX": " TX",
49    " TXX": " TX",
50}
51
52
53clean_taf_string = sanitize_string_with(TAF_REPL)
54
55
56CLEANERS: CleanerListType = [
57    OnlySlashes,
58    EmptyWind,
59    TrimWxCode,
60    SeparatedDistance,
61    SeparatedFirstTemperature,
62    SeparatedCloudAltitude,
63    SeparatedSecondTemperature,
64    SeparatedAltimeterLetter,
65    SeparatedTemperatureTrailingDigit,
66    SeparatedWindUnit,
67    SeparatedCloudQualifier,
68    SeparatedTafTimePrefix,
69    SeparatedMinMaxTemperaturePrefix,
70    RemoveFromTaf,
71    ReplaceItem,
72    RemoveTafAmend,
73    VisibilityGreaterThan,
74    MisplaceWindKT,
75    DoubleGust,
76    WindLeadingMistype,
77    NonGGust,
78    RemoveVrbLeadingDigits,
79    JoinedCloud,
80    JoinedTimestamp,
81    JoinedWind,
82    JoinedTafNewLine,
83    JoinedMinMaxTemperature,
84    ### Other wind fixes
85]
86
87clean_taf_list = sanitize_list_with(CLEANERS)
TAF_REPL = {'!': '1', '@': '2', '#': '3', '%': '5', '^': '6', '&': '7', '*': '8', '?': ' ', '"': '', "'": '', '`': '', '.': '', '(': ' ', ')': ' ', ';': ' ', 'MISSINGKT': '', ' 0I0': ' 090', 'NOSIGKT ': 'KT NOSIG ', 'KNOSIGT ': 'KT NOSIG ', '/VRB': ' VRB', 'CALMKT ': 'CALM ', 'CLMKT ': 'CALM ', 'CLRKT ': 'CALM ', ' <1/': ' M1/', '/04SM': '/4SM', '/4SSM': '/4SM', '/08SM': '/8SM', ' /34SM': '3/4SM', ' 3/SM': ' 3/4SM', 'PQ6SM ': 'P6SM ', 'P6000F ': 'P6000FT ', 'P6000FTQ ': 'P6000FT ', ' C A V O K ': ' CAVOK ', 'N0SIG': 'NOSIG', 'SCATTERED': 'SCT', 'BROKEN': 'BKN', 'OVERCAST': 'OVC', 'Z/ ': 'Z ', ' PROBB': ' PROB', ' PROBN': ' PROB', ' PROB3P': 'PROB30', ' TMM': ' TNM', ' TMN': ' TNM', ' TXN': ' TXM', ' TNTN': ' TN', ' TXTX': ' TX', ' TXX': ' TX'}
def clean_taf_string(text: str, sans: avwx.structs.Sanitization) -> str:
25    def sanitize_report_string(text: str, sans: Sanitization) -> str:
26        """Provide sanitization for operations that work better when the report is a string."""
27        text = text.strip().upper().rstrip("=")
28        if len(text) < 4:
29            return text
30        # Standardize whitespace
31        text = " ".join(text.split())
32        # Prevent changes to station ID
33        stid, text = text[:4], text[4:]
34        # Replace invalid key-value pairs
35        for key, rep in replacements.items():
36            if key in text:
37                text = text.replace(key, rep)
38                sans.log(key, rep)
39        separated = separate_cloud_layers(text)
40        if text != separated:
41            sans.extra_spaces_needed = True
42        return stid + separated

Provide sanitization for operations that work better when the report is a string.

CLEANERS: list[type[avwx.parsing.sanitization.cleaners.base.CleanItem] | type[avwx.parsing.sanitization.cleaners.base.CleanPair] | type[avwx.parsing.sanitization.cleaners.base.RemoveItem] | type[avwx.parsing.sanitization.cleaners.base.SplitItem] | type[avwx.parsing.sanitization.cleaners.base.CombineItems]] = [<class 'avwx.parsing.sanitization.cleaners.cleaners.OnlySlashes'>, <class 'avwx.parsing.sanitization.cleaners.wind.EmptyWind'>, <class 'avwx.parsing.sanitization.cleaners.cleaners.TrimWxCode'>, <class 'avwx.parsing.sanitization.cleaners.separated.SeparatedDistance'>, <class 'avwx.parsing.sanitization.cleaners.separated.SeparatedFirstTemperature'>, <class 'avwx.parsing.sanitization.cleaners.separated.SeparatedCloudAltitude'>, <class 'avwx.parsing.sanitization.cleaners.separated.SeparatedSecondTemperature'>, <class 'avwx.parsing.sanitization.cleaners.separated.SeparatedAltimeterLetter'>, <class 'avwx.parsing.sanitization.cleaners.separated.SeparatedTemperatureTrailingDigit'>, <class 'avwx.parsing.sanitization.cleaners.separated.SeparatedWindUnit'>, <class 'avwx.parsing.sanitization.cleaners.separated.SeparatedCloudQualifier'>, <class 'avwx.parsing.sanitization.cleaners.separated.SeparatedTafTimePrefix'>, <class 'avwx.parsing.sanitization.cleaners.separated.SeparatedMinMaxTemperaturePrefix'>, <class 'avwx.parsing.sanitization.cleaners.remove.remove_items_in.<locals>.RemoveInList'>, <class 'avwx.parsing.sanitization.cleaners.replace.ReplaceItem'>, <class 'avwx.parsing.sanitization.cleaners.remove.RemoveTafAmend'>, <class 'avwx.parsing.sanitization.cleaners.visibility.VisibilityGreaterThan'>, <class 'avwx.parsing.sanitization.cleaners.wind.MisplaceWindKT'>, <class 'avwx.parsing.sanitization.cleaners.wind.DoubleGust'>, <class 'avwx.parsing.sanitization.cleaners.wind.WindLeadingMistype'>, <class 'avwx.parsing.sanitization.cleaners.wind.NonGGust'>, <class 'avwx.parsing.sanitization.cleaners.wind.RemoveVrbLeadingDigits'>, <class 'avwx.parsing.sanitization.cleaners.joined.JoinedCloud'>, <class 'avwx.parsing.sanitization.cleaners.joined.JoinedTimestamp'>, <class 'avwx.parsing.sanitization.cleaners.joined.JoinedWind'>, <class 'avwx.parsing.sanitization.cleaners.joined.JoinedTafNewLine'>, <class 'avwx.parsing.sanitization.cleaners.joined.JoinedMinMaxTemperature'>]
def clean_taf_list(wxdata: list[str], sans: avwx.structs.Sanitization) -> list[str]:
 53    def sanitize_report_list(wxdata: list[str], sans: Sanitization) -> list[str]:
 54        """Provide sanitization for operations that work better when the report is a list."""
 55        for i, item in reversed(list(enumerate(wxdata))):
 56            for cleaner in _cleaners:
 57                # TODO: Py3.10 change to match/case on type
 58                if isinstance(cleaner, CombineItems):
 59                    if i and cleaner.can_handle(wxdata[i - 1], item):
 60                        wxdata[i - 1] += wxdata.pop(i)
 61                        sans.extra_spaces_found = True
 62                        if cleaner.should_break:
 63                            break
 64                elif isinstance(cleaner, SplitItem):
 65                    if index := cleaner.split_at(item):
 66                        wxdata.insert(i + 1, item[index:])
 67                        wxdata[i] = item[:index]
 68                        sans.extra_spaces_needed = True
 69                        if cleaner.should_break:
 70                            break
 71                elif isinstance(cleaner, CleanPair):
 72                    if i and cleaner.can_handle(wxdata[i - 1], item):
 73                        clean_first, clean_second = cleaner.clean(wxdata[i - 1], item)
 74                        if wxdata[i - 1] != clean_first:
 75                            sans.log(wxdata[i - 1], clean_first)
 76                            wxdata[i - 1] = clean_first
 77                        if item != clean_second:
 78                            sans.log(item, clean_second)
 79                            wxdata[i] = clean_second
 80                            break
 81                elif cleaner.can_handle(item):
 82                    if isinstance(cleaner, RemoveItem):
 83                        sans.log(wxdata.pop(i))
 84                    elif isinstance(cleaner, CleanItem):
 85                        cleaned = cleaner.clean(item)
 86                        wxdata[i] = cleaned
 87                        sans.log(item, cleaned)
 88                    if cleaner.should_break:
 89                        break
 90
 91        # TODO: Replace with above syntax after testing?
 92        # May wish to keep since some elements could be checked after space needed...but so could the others?
 93
 94        # Check for wind sanitization
 95        for i, item in enumerate(wxdata):
 96            # Skip Station
 97            if i == 0:
 98                continue
 99            if is_variable_wind_direction(item):
100                replaced = item[:7]
101                wxdata[i] = replaced
102                sans.log(item, replaced)
103                continue
104            possible_wind = sanitize_wind(item)
105            if is_wind(possible_wind):
106                if item != possible_wind:
107                    sans.log(item, possible_wind)
108                wxdata[i] = possible_wind
109
110        # Strip extra characters before dedupe
111        stripped = [i.strip("./\\") for i in wxdata]
112        if wxdata != stripped:
113            sans.log_list(wxdata, stripped)
114        deduped = dedupe(stripped, only_neighbors=True)
115        if len(deduped) != len(wxdata):
116            sans.duplicates_found = True
117        return deduped

Provide sanitization for operations that work better when the report is a list.