Extract pattern from sql script - python

I try to generate output by reading either sql script or shell script in unix box and output file is generated with statement functionality (Create,drop,update,delete,merge,insert) followed by tablename. I try to accomplish this output in a generic way to read any code and generate the output. Can this be achieved using awk programming.
OUTPUT
MERGE|temp_st_rx_wk_str_ip_rpt
SELECT|rx_ov_ord_excep_str_sku
SELECT|ndc
SELECT|fiscal_week
SELECT|store
SELECT|dss_saf_user01.rx_ov_ord_exclu_str
SELECT|rx_osv_invoice_str_ndc
DROP|temp_extract
CREATE|temp_build_extract
SELECT|temp_st_rx_wk_str_ip_rpt
CODE
merge into temp_st_rx_wk_str_ip_rpt s
USING (SELECT b.week_nbr,
b.store_nbr,
SUM (NVL (a.orig_on_ord_qty, 0)) AS mnd_ov_ord_orig_qty,
SUM (NVL (b.inv_qty, 0)) AS mnd_ov_inv_qty
FROM (SELECT /*+ PARALLEL (s,8) */ w.week_nbr, s.store_nbr, s.ndc_nbr,
SUM (s.orig_on_ord_qty) AS orig_on_ord_qty
FROM rx_ov_ord_excep_str_sku s,
ndc n,
fiscal_week w,
store st
WHERE s.ndc_nbr = n.ndc_nbr
AND s.store_nbr = st.store_nbr
AND s.ord_dt BETWEEN w.start_dt AND w.end_dt
AND n.schd_drug_cd NOT IN (''02'', ''07'')
AND n.gen_brand_ind <> ''Y''
AND s.orig_on_ord_qty < 1000 -- Arbitrary value used to exclude bad data
AND w.week_nbr = &P_WEEK_NBR
AND st.area_nbr NOT IN (0, 10, 11)
AND st.pharm_ind = ''Y''
AND s.store_nbr NOT IN
(SELECT store_nbr
FROM dss_saf_user01.rx_ov_ord_exclu_str
WHERE rx_ov_ord_exclu_cd = ''CP'')
GROUP BY w.week_nbr, s.store_nbr, s.ndc_nbr) a,
(SELECT /*+ INDEX (s,RX_OSV_INVOICE_STR_NDC_PK) */
w.week_nbr, s.store_nbr, s.ndc_nbr,
SUM (s.inv_qty) AS inv_qty
FROM rx_osv_invoice_str_ndc s,
ndc n,
store st,
fiscal_week w
WHERE s.ndc_nbr = n.ndc_nbr
AND s.store_nbr = st.store_nbr
AND s.ord_dt BETWEEN w.start_dt AND w.end_dt
AND s.ord_type_cd <> ''F''
AND n.schd_drug_cd NOT IN (''02'', ''07'')
AND n.gen_brand_ind <> ''Y''
AND s.inv_qty > 0
AND w.week_nbr = &P_WEEK_NBR
AND st.area_nbr NOT IN (0, 10, 11)
AND st.pharm_ind = ''Y''
AND s.store_nbr NOT IN
(SELECT store_nbr
FROM dss_saf_user01.rx_ov_ord_exclu_str
WHERE rx_ov_ord_exclu_cd = ''CP'')
GROUP BY w.week_nbr, s.store_nbr, s.ndc_nbr) b
WHERE a.week_nbr (+) = b.week_nbr
AND a.store_nbr (+) = b.store_nbr
AND a.ndc_nbr (+) = b.ndc_nbr
GROUP BY b.week_nbr, b.store_nbr) t
ON (t.week_nbr = s.week_nbr
AND t.store_nbr = s.store_nbr)
WHEN NOT MATCHED
THEN
INSERT (week_nbr, store_nbr, mnd_ov_ord_orig_qty, mnd_ov_inv_qty)
VALUES (t.week_nbr, t.store_nbr, t.mnd_ov_ord_orig_qty, t.mnd_ov_inv_qty)
WHEN MATCHED
THEN
UPDATE SET
s.mnd_ov_ord_orig_qty = t.mnd_ov_ord_orig_qty,
s.mnd_ov_inv_qty = t.mnd_ov_inv_qty';
commit;
drop table temp_extract;
create table temp_build_extract as select * from temp_st_rx_wk_Str_ip_rpt;

You can try:
awk -f e.awk input.txt
where input.txt is your input file (CODE), and e.awk is:
/^merge / {
if (match($0,/merge into ([^[:blank:]]+)/,a)) {
print "MERGE|"a[1]
next
}
}
/FROM [^(]/ {
getFromTabs()
if (match(from,/FROM ([^[:blank:]]+)/,a)) {
printKey(a[1])
do {
ind=index(from,",")
if (ind) {
from=substr(from,ind+1)
match(from,/[[:space:]]*([[:alnum:]]+)/,a)
printKey(a[1])
}
}
while (ind)
}
}
/^drop/ {
if (match($0,/drop table ([^[:blank:]]+)/,a)) {
print "DROP|"a[1]
next
}
}
/^create/ {
if (match($0,/create table ([^[:blank:]]+)/,a)) {
print "CREATE|"a[1]
}
if (match($0,/select.*[[:blank:]]([^[:blank:]]+);/,a)) {
print "SELECT|"a[1]
}
}
function printKey(key) {
if (!(key in T)) {
print "SELECT|"key
T[key]++
}
}
function getFromTabs(p) {
p=0
from=""
do {
from=(p++==0)?$0:(from ORS $0)
getline
}
while (!/WHERE/)
}
For your sample code above this produces output:
MERGE|temp_st_rx_wk_str_ip_rpt
SELECT|rx_ov_ord_excep_str_sku
SELECT|ndc
SELECT|fiscal
SELECT|store
SELECT|dss_saf_user01.rx_ov_ord_exclu_str
SELECT|rx_osv_invoice_str_ndc
DROP|temp_extract;
CREATE|temp_build_extract
SELECT|temp_st_rx_wk_Str_ip_rpt
(Note that I know nothing about SQL, so you must check if this looks ok to you.)

Related

Discover relationship between the entities

I have a dataset like below -
List((X,Set(" 1", " 7")), (Z,Set(" 5")), (D,Set(" 2")), (E,Set(" 8")), ("F ",Set(" 5", " 9", " 108")), (G,Set(" 2", " 11")), (A,Set(" 7", " 5")), (M,Set(108)))
Here X is related to A as 7 is common between them
Z is related to A as 5 is common between them
F is related to A as 5 is common between them
M is related to F as 108 is common between them
So, X, Z, A, F and M are related
D and G are related as 2 is common between them
E is not related to anybody
So, the output would be ((X, Z, A, F, M), (D,G), (E))
Order doesn't matter here.
I have used Scala here, but solution in Scala/Python or a pseudocode would work for me.
Build an undirected graph where each label is connected to each number from the corresponding set (i.e. (A, { 1, 2 }) would give two edges: A <-> 1 and A <-> 2)
Compute the connected components (using depth-first search, for example).
Filter out only the labels from the connected components.
import util.{Left, Right, Either}
import collection.mutable
def connectedComponentsOfAsc[F, V](faces: List[(F, Set[V])]): List[List[F]] = {
type Node = Either[F, V]
val graphBuilder = mutable.HashMap.empty[Node, mutable.HashSet[Node]]
def addEdge(a: Node, b: Node): Unit =
graphBuilder.getOrElseUpdate(a, mutable.HashSet.empty[Node]) += b
for
(faceLabel, vertices) <- faces
vertex <- vertices
do
val faceNode = Left(faceLabel)
val vertexNode = Right(vertex)
addEdge(faceNode, vertexNode)
addEdge(vertexNode, faceNode)
val graph = graphBuilder.view.mapValues(_.toSet).toMap
val ccs = connectedComponents(graph)
ccs.map(_.collect { case Left(faceLabel) => faceLabel }.toList)
}
def connectedComponents[V](undirectedGraph: Map[V, Set[V]]): List[Set[V]] = {
val visited = mutable.HashSet.empty[V]
var connectedComponent = mutable.HashSet.empty[V]
val components = mutable.ListBuffer.empty[Set[V]]
def dfs(curr: V): Unit = {
if !visited(curr) then
visited += curr
connectedComponent += curr
undirectedGraph(curr).foreach(dfs)
}
for v <- undirectedGraph.keys do
if !visited(v) then
connectedComponent = mutable.HashSet.empty[V]
dfs(v)
components += connectedComponent.toSet
components.toList
}
Can be used like this:
#main def main(): Unit = {
println(connectedComponentsOfAsc(
List(
("X",Set("1", "7")),
("Z",Set("5")),
("D",Set("2")),
("E",Set("8")),
("F",Set("5", "9", "108")),
("G",Set("2", "11")),
("A",Set("7", "5")),
("M",Set("108"))
)
).map(_.sorted).sortBy(_.toString))
}
Produces:
List(List(A, F, M, X, Z), List(D, G), List(E))
All steps are O(n) (scales linearly with the size of input).
This answer is self-contained, but using some kind of graph-library would be clearly advantageous here.
Ultimately using a simpler solution in python as below:
data=[
["X",{"1", "7"}],
["Z",{"5",}],
["D",{"2",}],
["E",{"8",}],
["F",{"5", "9", "108"}],
["G",{"2", "11"}],
["A",{"7", "5"}],
["M",{"108"}]
]
for i in range(len(data)):
for j in range(len(data)):
if(data[i][1].intersection(data[j][1])):
if(data[i][0]!=data[j][0] ):
data[i][1] = data[j][1] = (data[i][1]).union(data[j][1])
for k, g in groupby(sorted([[sorted(tuple(d[1])),d[0]] for d in data]), key=lambda x: x[0]):
print(list(l[1] for l in g))
Getting output as :
['A', 'F', 'M', 'X', 'Z']
['D', 'G']
['E']
Tested for few more datasets and it seems to be working fine.
// I put some values in quotes so we have consistent string input
val initialData :List[(String, Set[String])] = List(
("X",Set(" 1", " 7")),
("Z",Set(" 5")),
("D",Set(" 2")),
("E",Set(" 8")),
("F ",Set(" 5", " 9", " 108")),
("G",Set(" 2", " 11")),
("A",Set(" 7", " 5")),
("M",Set("108"))
)
// Clean up the Sets by turning the string data inside the sets into Ints.
val cleanedData = initialData.map(elem => (elem._1, elem._2.map(_.trim.toInt)))
> cleanedData: List[(String, scala.collection.immutable.Set[Int])] = List((X,Set(1, 7)), (Z,Set(5)), (D,Set(2)), (E,Set(8)), ("F ",Set(5, 9, 108)), (G,Set(2, 11)), (A,Set(7, 5)), (M,Set(108)))
// Explode the Sets into a list of simple mappings. X -> 1, X -> 7 individually.
val explodedList = cleanedData.flatMap(x => x._2.map(v => (x._1, v)))
> explodedList: List[(String, Int)] = List((X,1), (X,7), (Z,5), (D,2), (E,8), ("F ",5), ("F ",9), ("F ",108), (G,2), (G,11), (A,7), (A,5), (M,108))
Group them together by the new key
val mappings = explodedList.groupBy(_._2)
> mappings: scala.collection.immutable.Map[Int,List[(String, Int)]] = Map(5 -> List((Z,5), ("F ",5), (A,5)), 1 -> List((X,1)), 9 -> List(("F ",9)), 2 -> List((D,2), (G,2)), 7 -> List((X,7), (A,7)), 108 -> List(("F ",108), (M,108)), 11 -> List((G,11)), 8 -> List((E,8)))
Print the output
mappings.foreach { case (key, items) =>
println(s"${items.map(_._1).mkString(",")} are all related because of $key")
}
> Z,F ,A are all related because of 5
> X are all related because of 1
> F are all related because of 9
> D,G are all related because of 2
> X,A are all related because of 7
> F ,M are all related because of 108
> G are all related because of 11
> E are all related because of 8
Read input, creating a vector of pairs
e.g.
X 1
X 7
Z 5
...
Sort the vector in order of the second member of the pairs
e.g
X 1
D 2
G 2
...
Iterate over sorted vector, adding to a "pass1 group" so long as the second member does not change. If it does change, start a new pass1 group.
e.g.
X
D G
Z F A
X A
E
F
G
merge pass1 groups with common members to give the output groups.
Here is the C++ code that implements this
#include <string>
#include <iostream>
#include <vector>
#include <algorithm>
bool merge(
std::vector<char> &res,
std::vector<char> &vg)
{
bool ret = false;
for (char r : res)
{
for (char c : vg)
{
if (c == r)
ret = true;
}
}
if (!ret)
return false;
for (char c : vg)
{
if (std::find(res.begin(), res.end(), c) == res.end())
res.push_back(c);
}
return true;
}
void add(
std::vector<std::vector<char>> &result,
std::vector<char> &vg)
{
std::vector<char> row;
for (char c : vg)
row.push_back(c);
result.push_back(row);
}
main()
{
std::string input = "List((X,Set(\" 1\", \" 7\")), (Z,Set(\" 5\")), (D,Set(\" 2\")), (E,Set(\" 8\")), (F,Set(\" 5\", \" 9\", \" 108\")), (G,Set(\" 2\", \" 11\")), (A,Set(\" 7\", \" 5\")), (M,Set(\"108\")))";
input = "List((A,Set(\"0\", \"1\")),(B,Set(\"1\", \"2\")),(C,Set(\"2\", \"3\")),(D,Set(\"3\", \"4\")))";
std::vector<std::pair<char, int>> vinp;
int p = input.find("Set");
int q = input.find("Set", p + 1);
while (p != -1)
{
char c = input[p - 2];
int s = input.find_first_of("0123456789", p);
if( s == -1 )
break;
while (s < q)
{
vinp.push_back(std::make_pair(
c,
atoi(input.substr(s).c_str())));
s = input.find_first_of("0123456789", s + 3);
if( s == -1 )
break;
}
p = q;
q = input.find("Set", p + 1);
if( q == -1 )
q = input.length();
}
std::sort(vinp.begin(), vinp.end(),
[](std::pair<char, int> a, std::pair<char, int> b)
{
return a.second < b.second;
});
std::cout << "sorted\n";
for (auto &p : vinp)
std::cout << p.first << " " << p.second << "\n";
std::vector<std::vector<char>> vpass1;
std::vector<char> row;
int sec = -1;
for (auto &p : vinp)
{
if (p.second != sec)
{
// new group
if (row.size())
vpass1.push_back(row);
sec = p.second;
row.clear();
}
row.push_back(p.first);
}
std::cout << "\npass1\n";
for (auto &row : vpass1)
{
for (char c : row)
std::cout << c << " ";
std::cout << "\n";
}
std::vector<std::vector<char>> result;
std::vector<char> pass2group;
bool fmerge2 = true;
while (fmerge2)
{
fmerge2 = false;
for (auto &vg : vpass1)
{
if (!result.size())
add(result, vg);
else
{
bool fmerge1 = false;
for (auto &res : result)
{
if (merge(res, vg))
{
fmerge1 = true;
fmerge2 = true;
break;
}
}
if (!fmerge1)
add(result, vg);
}
}
if (fmerge2)
{
vpass1 = result;
result.clear();
}
}
std::cout << "\n(";
for (auto &res : result)
{
if (res.size())
{
std::cout << "(";
for (char c : res)
std::cout << c << " ";
std::cout << ")";
}
}
std::cout << ")\n";
return 0;
}
It produces the correct result
((X A Z F M )(D G )(E ))

segmentio/parquet fails to read files written with Julia and/or Python

The code below, which was taken almost verbatim from the segmentio/arrow, fails to read .parquet files written with Python and/or Julia libs. When the code returns from the call to parquet.ReadFile("file") the rows contain 0 values for int64 or "" for strings. The reading fails with codec = {ZSTD, GZIP, or SNAPPY}
type FiRowType struct{ x1, x2, x3 int64 }
func RdFiFile() {
rows, err := parquet.ReadFile[FiRowType]("fileName_ZSTD.parquet")
if err != nil {
log.Fatal(err)
}
for _, c := range rows {
fmt.Printf("%+v\n", c)
}
}
type FsRowType struct{ x1, x2, x3 string }
func RdFsFile() {
rows, err := parquet.ReadFile[FsRowType]("fileName_ZSTD.parquet")
if err != nil {
log.Fatal(err)
}
for _, c := range rows {
fmt.Printf("%+v\n", c)
}
}
The Golang code does not return an error, i.e., err == nil.
The code returns the right number of rows and columns, some info in the metadata seems to be correct (like the originator of the file, date of creation etc). I created the files using Julia:
using Parquet
function WrForGo( )
min = 1
max = 10
# arrays of size (10,3). ai is int and as is
String
ai = Array{Int64, 2}(undef, 10,3)
as = Array{String, 2}(undef, 10,3)
for i =1:max
for j=1:3
as[i,j] = string(i, pad=2) * "_" * string(j,pad=2)
ai[i,j] = (j-1)*10 + i
end
end
dfi = DataFrame(ai, :auto) ; dfs = DataFrame(as, :auto)
print( dfi ) ; print( dfs )
Parquet.write_parquet( prqDir * "fi_ZSTD.parquet", compression_codec = "ZSTD", dfi)
Parquet.write_parquet( prqDir * "fs_ZSTD.parquet", compression_codec = "ZSTD", dfs)
Parquet.write_parquet( prqDir * "fi_GZIP.parquet", compression_codec = "GZIP", dfi)
Parquet.write_parquet( prqDir * "fs_GZIP.parquet", compression_codec = "GZIP", dfs)
Parquet.write_parquet( prqDir * "fi_SNAPPY.parquet", compression_codec = "SNAPPY", dfi)
Parquet.write_parquet( prqDir * "fs_SNAPPY.parquet", compression_codec = "SNAPPY", dfs)
end
In Go variables that start with a lower case character not exported so cannot be updated from other packages (e.g. segmentio/parquet-go). Try the below:
package main
import (
"fmt"
"log"
"github.com/segmentio/parquet-go"
)
type FiRowType struct {
X1 int64 `parquet:"x1,optional"`
X2 int64 `parquet:"x2,optional"`
X3 int64 `parquet:"x3,optional"`
}
func RdFiFile() {
rows, err := parquet.ReadFile[FiRowType]("fi_ZSTD.parquet")
if err != nil {
log.Fatal(err)
}
for i, c := range rows {
fmt.Printf("%d %+v\n", i, c)
}
}
func main() {
RdFiFile()
}

My pandas logic doesn't seem to result what I want, no matter how many tests and changes I make

df['col1'] = df.loc[((df['NGPC PT'] > 1) | ((df['SC'] < 2)& (df['SC'] > 5)) & ((df['NGPC PT'] >4) & (df['NGPC PT'] <7))),'RULE OF NGPC'] ='SO'
Basically there are two values this is supposed to give OBS and SO, OBS is what is the standard value for col1 and whenever the values don't meet the below requirements they change to SO.
*note the below reqs are in C#:
d["RULE OF NGPC"] = (v["PT"].Equals("5") || v["PT"].Equals("6")) ? "OBS" : "SO";
int COUNT = 0;
if(v["PT"].Equals("1"))
{
if ((v["SC"].Equals("2"))&&(COUNT==0))
{
COUNT = COUNT + 1;
d["RULE OF NGPC"] = (v["PT"].Equals("1") && v["SC"].Equals("2")) ? "OBS" : "SO";
}
if ((v["SC"].Equals("3")) && (COUNT == 0))
{
COUNT = COUNT + 1;
d["RULE OF NGPC"] = (v["PT"].Equals("1") && v["SC"].Equals("3")) ? "OBS" : "SO";
}
if ((v["SC"].Equals("4")) && (COUNT == 0))
{
COUNT = COUNT + 1;
d["RULE OF NGPC"] = (v["PT"].Equals("1") && v["SC"].Equals("4")) ? "OBS" : "SO";
}
if ((v["SC"].Equals("5")) && (COUNT == 0))
{
COUNT = COUNT + 1;
d["RULE OF NGPC"] = (v["PT"].Equals("1") && v["SC"].Equals("5")) ? "OBS" : "SO";
}
}
d["NGPC PT"] = v["PT"];
d["SC"] = v["SC"];
The code in C# above is not mine, I am trying to convert it to python.
I suggest you first initialize the column with 'OBS', then replace the values based on your conditions:
df['output'] = 'OBS'
df.loc[((df['NGPC PT'] > 1) | ((df['SC'] < 2)& (df['SC'] > 5)) & ((df['NGPC PT'] >4) & (df['NGPC PT'] <7))),'output'] = 'SO'
This may work if none of the conditions in the expression are wrong.

Visualise focus and interaction in Selenium

I want to see how the page is interacted with during my tests, e.g. what elements currently have the focus and where the interaction happens (similar to what the Cypress UI does).
How can I most conveniently achieve this in Selenium for Python?
Heavily inspired by the helper function from https://developer.mozilla.org/en-US/docs/Web/API/CSSStyleSheet/insertRule#Function_to_add_a_stylesheet_rule to add global stylesheet rules into a page I created add_css.js (in the subfolder helper_js):
/* global arguments */
(function (rules) {
var styleEl = document.createElement("style");
styleEl.classList.add("PART-OF-SELENIUM-TESTING");
// Append <style> element to <head>
document.head.appendChild(styleEl);
// Grab style element's sheet
var styleSheet = styleEl.sheet;
for (var i = 0; i < rules.length; i++) {
var j = 1,
rule = rules[i],
selector = rule[0],
propStr = "";
// If the second argument of a rule is an array of arrays, correct our variables.
if (Array.isArray(rule[1][0])) {
rule = rule[1];
j = 0;
}
for (var pl = rule.length; j < pl; j++) {
var prop = rule[j];
propStr += prop[0] + ": " + prop[1] + (prop[2] ? " !important" : "") + ";\n";
}
// Insert CSS Rule
styleSheet.insertRule(selector + "{" + propStr + "}", styleSheet.cssRules.length);
}
}).apply(null, arguments);
which I then loaded and injected into the page using:
import pkgutil
add_css = pkgutil.get_data("helper_js", "add_css.js").decode("utf8")
# ...
driver.execute_script(
add_css,
[
[":active", ["outline", "3px dashed red", True]],
[":focus", ["outline", "3px dashed yellow", True]],
[":active:focus", ["outline", "3px dashed orange", True]],
]
)
in order to add global styles for elements which are active or have focus.

Convert python to objective-c

So I'm trying to convert a some python to it's objective-c equivalent but i'm not having much luck.
The Python code is as follows:
def get_next_guess(passwords):
scores = {}
for candidate in passwords:
results = []
remainder = (x for x in passwords if x != candidate)
for correct_pw in remainder:
no_alternatives = len(refine(remainder, candidate, distance(candidate, correct_pw)))
print(no_alternatives)
results.append(no_alternatives)
scores[candidate] = max(results)
print(scores)
return min(scores, key = lambda x: scores[x])
And my current Objective-C code is:
void get_next_guess(NSMutableArray * passwords)
{
NSMutableDictionary * scores = [NSMutableDictionary new];
for (NSString* candidate in passwords)
{
NSMutableArray * results = [NSMutableArray new];
NSMutableArray * remainder = [NSMutableArray new];
for (NSString * x in passwords)
{
if (x != candidate)
{
[remainder addObject:x];
}
}
for (NSString * correct_pw in remainder)
{
NSUInteger no_alternatives = [refine(remainder, candidate, distance(candidate, correct_pw)) count];
NSNumber *n = [NSNumber numberWithInteger:no_alternatives];
[results addObject:n];
}
NSArray *sorted_Array = [results sortedArrayUsingDescriptors:
#[[NSSortDescriptor sortDescriptorWithKey:#"intValue"
ascending:YES]]];
[scores setObject:[sorted_Array lastObject] forKey:candidate];
}
NSLog(#"table: %#", scores);
}
I appreciate that the obj-c code is very rough, I'm just trying to get something that will work. The code is part of a puzzle solver i'm trying to create.
I suspect the (main) problem is around the obj-c version of:
remainder = (x for x in passwords if x != candidate)
The Obj-c version returns this:
table: {
COELOMS = 4;
HOLLOES = 4;
MYOLOGY = 5;
PADLOCK = 5;
PARTONS = 4;
PILINGS = 6;
POMPONS = 5;
PRECESS = 6;
PROSECT = 4;
SALLOWS = 4;
TOOLERS = 5;
TROILUS = 6;
}
And the Python version returns this:
{'PARTONS': 3,
'HOLLOES': 3,
'PADLOCK': 4,
'TOOLERS': 4,
'COELOMS': 3,
'PROSECT': 3,
'MYOLOGY': 4,
'PRECESS': 0,
'TROILUS': 5,
'SALLOWS': 3,
'PILINGS': 4,
'POMPONS': 2}
(the Python output being correct)
Change if (x != candidate) to if (![x isEqualToString: candidate]).

Categories