Longest palindromic substring top down dynamic programming

Longest palindromic substring top down dynamic programming - python

Here is the algorithm for finding longest palindromic substring given a string s using bottom-up dynamic programming. So the algorithm explores all possible length j substring and checks whether it is a valid palindrome for j in 1 to n. The resulting time and space complexity is O(n^2).
def longestPalindrome(s):
n = len(s)
if n < 2:
return s
P = [[False for _ in range(n)] for _ in range(n)]
longest = s[0]
# j is the length of palindrome
for j in range(1, n+1):
for i in range(n-j+1):
# if length is less than 3, checking s[i] == s[i+j-1] is sufficient
P[i][i+j-1] = s[i] == s[i+j-1] and (j < 3 or P[i+1][i+j-2])
if P[i][i+j-1] and j > len(longest):
longest = s[i:i+j]
return longest
I am trying to implement the same algorithm in top-down approach with memoization.
Question:
Is it possible to convert this algorithm to top-down approach?
There are many questions about longest palindromic substring, but they are mostly using this bottom-up approach. The answer in https://stackoverflow.com/a/29959104/6217326 seems to be the closest to what I have in mind. But the answer seems to be using different algorithm from this one (and much slower).

Here is my solution recursively:
Start with i = 0, j = max length
if(i,j) is palindrome: then max substring length is j-1.
else do recursion with (i+1,j) and (i, j-1) and take the Max between these two.
Code will explain more.
The code is in Java, but I hope it will give the idea how to implement it. #zcadqe wanted the idea regarding how to implement in Top-down approach. I gave the idea and as a bonus also giving the code of java for better understanding. Anyone who knows python can easily convert the code!
public class LongestPalindromeSubstringWithSubStr {
static String str;
static int maxLen;
static int startLen;
static int endLen;
static int dp[][];// 0: not calculaed. 1: from index i to j is palindrome
static boolean isPal(int i, int j) {
if (dp[i][j] != 0) {
System.out.println("Res found for i:" + i + " j: " + j);
return (dp[i][j] == 1);
}
if (i == j) {
dp[i][j] = 1;
return true;
}
if (i + 1 == j) {// len 2
if (str.charAt(i) == str.charAt(j)) {
dp[i][j] = 1;
return true;
}
dp[i][j] = -1;
return false;
}
if (str.charAt(i) == str.charAt(j)) {
boolean res = isPal(i + 1, j - 1);
dp[i][j] = (res) ? 1 : 0;
return res;
}
dp[i][j] = 0;
return false;
}
// update if whole string from i to j is palindrome
static void longestPalCalc(int i, int j) {
if (isPal(i, j)) {
if (j - i + 1 > maxLen) {// update res
maxLen = j - i + 1;
startLen = i;
endLen = j;
}
} else {
longestPalCalc(i + 1, j);
longestPalCalc(i, j - 1);
}
}
public static void main(String[] args) {
str = "abadbbda";
dp = new int[str.length()][str.length()];
longestPalCalc(0, str.length() - 1);
System.out.println("Longest: " + maxLen);
System.out.println(str.subSequence(startLen, endLen + 1));
}
}

the problem with top down approach here is that it's hard to implement topological order . You cant run 2 for loops and use memoization with it, as this Topological order (2 for loops) gives substrings but it isn't the right T.O for palindrome as palindrome of 3 digit requires info about it's inside palindrome always(of 1 digit in this case).to know if a _ _ a is palindrome or not you must know whether _ _ is palindrome or not. Thus the Topo order you require is : x,x,xx,xx,xx,xxx,xxx,xxxx,xxxxx substrings of increasing length.
I'll post Top Down approach when I code or get one.

I tried to code Junaed's java code to Python and it's running quite well on Leetcode but is getting Memory Limit Exceeded on one of the test cases. See if we can somehow modify this further to get a better result or if I missed something in it, please do correct me.
def longestPalindrome(self, s: str) -> str:
#lru_cache(maxsize=None)
def dp(i,j):
if i==j:
return True
if i+1==j:
if s[i]==s[j]:
return True
return False
if s[i]==s[j]:
return dp(i+1,j-1)
return False
self.maxlen=0
#lru_cache(maxsize=None)
def dp2(i,j):
if dp(i,j):
if (j-i+1 > self.maxlen):
self.maxlen=j-i+1
self.ans=s[i:j+1]
else:
dp2(i+1,j)
dp2(i,j-1)
self.ans=""
i=0
j=len(s)-1
dp2(i,j)
return self.ans

This problem can be solved by adding memorization to the brute force approach,
We need to generate each substring this will take O(n^2) time, and
we need to check whether the generated substring is a palindrome, this will take an additional O(n),
in total it will be an O(n^3) time complexity.
Now, adding and storing the states that we already encountered to speed up the process, the time complexity can be reduced by O(n). So the total time complexity will be O(n^2)
here's the solution:
class Solution:
def longestPalindrome(self, s: str) -> str:
memo = {}
def isPalindrome(left,right):
state = (left, right)
if state in memo: return memo[state]
if left >= right: return True
if s[left] != s[right]: return False
memo[state] = isPalindrome(left+1, right-1)
return memo[state]
N = len(s)
result = ''
for i in range(N):
for j in range(i,N):
if (j-i+1) > len(result) and isPalindrome(i,j):
result = s[i:j+1]
return result

#include<iostream>
#include<string>
#include<vector>
using namespace std;
bool isPalindrome(string str, int startIdx, int stopIdx, vector<vector<int>>& T) {
const int i = startIdx;
const int j = stopIdx - 1;
if (i == (j + 1)) {
return true;
}
if (i >= j) {
return false;
}
if (T[i][j] == -1) {
if (str[i] == str[j]) {
T[i][j] = isPalindrome(str, startIdx + 1, stopIdx - 1, T);
}
else {
T[i][j] = 0;
}
}
return (T[i][j] == 1);
}
string getLongestStr(string str, int startIdx, int stopIdx, vector<vector<int>>& T) {
if (isPalindrome(str, startIdx, stopIdx, T)) {
return str.substr(startIdx, (stopIdx - startIdx));
}
else {
string str1 = getLongestStr(str, startIdx + 1, stopIdx, T);
string str2 = getLongestStr(str, startIdx, stopIdx - 1, T);
return str1.size() > str2.size() ? str1 : str2;
}
return "";
}
string getLongestStr(string str) {
const int N = str.size();
vector<vector<int>> T(N, vector<int>(N, -1));
return getLongestStr(str, 0, N, T);
}
int main() {
string str = "forgeeksskeegfor";
//string str = "Geeks";
cout << getLongestStr(str) << endl;
return 0;
}

Related

PowerSum time out test case

Please need your help, I got one failed test case due to time out if anyone can help me to improve the time taken by code to be executed. This problem is from HackerRank website if anyone needs more explanation I will refer the link of the problem in the comments below
from itertools import combinations
def powerSum(X, N,n=1,poss=[]):
if(n**N <= X):
poss.append(n)
n+=1
rslt = powerSum(X,N,n,poss)
else:
tmp=[]
for _ in range(len(poss)):
oc=combinations(poss,_+1)
for x in oc:
ok = sum([num**N for num in x])
if(ok == X):
tmp.append(ok)
return len(tmp)
return rslt

I am not good in python, but I hope below java code can be easily understood, This is a indirectly a variation of subset sum problem which is a dynamic programming problem where you have to find no. of ways to get a given particular sum given an array of values,so basically before applying subset problem, I have made a list of number which can be used in making the required sum by stopping at that number whose kth power exceed the x because starting from that natural number, further natural number are going to have much larger kth power value so no need of keeping them in our list so break there then it is just a dynamic programming problem as mentioned above where our list has value of kth power of valid natural number and we have to find the different way to get the sum x using those kth power values.
below is the code for more clear understanding
import java.util.*;
public class Main {
public static int find_it(int x , int n , List<Integer> a , int [][] dp){
for(int i = 0; i < n; ++i){
dp[i][0] = 1;
}
for(int i = 1; i <= n; ++i){
for(int j = 1; j <= x; ++j){
dp[i][j] += dp[i - 1][j];
if(j - a.get(i - 1) >= 0){
dp[i][j] += dp[i - 1][j - a.get(i - 1)];
}
}
}
return dp[n][x];
}
public static void main(String [] args){
Scanner input = new Scanner(System.in);
int x = input.nextInt() , k = input.nextInt();
List<Integer> a = new ArrayList<>();
for(int i = 1; ; ++i){
double value = Math.pow(i , k);
if(value > x){
break;
}
a.add((int)value);
}
int n = a.size();
int [][]dp = new int[n + 1][x + 1];
int answer = find_it(x , n , a , dp);
System.out.println(answer);
input.close();
}
}

Leetcode 200. Number of Islands TLE

Link to the question: https://leetcode.com/problems/number-of-islands/
Given a 2d grid map of '1's (land) and '0's (water), count the number of islands. An island is surrounded by water and is formed by connecting adjacent lands horizontally or vertically. You may assume all four edges of the grid are all surrounded by water.
Example 1:
Input:
11110
11010
11000
00000
Output: 1
My logic is to simply do dfs from every node and keep track of the connected components.
Am getting Time Limit Exceeded (TLE) for the 46th test case, can someone help me optimize this code?
class Solution(object):
def numIslands(self, grid):
def in_grid(x, y):
return 0 <= x < len(grid) and 0 <= y < len(grid[0])
def neighbours(node):
p, q = node
dir = [(-1, 0), (0, 1), (1, 0), (0, -1)]
return [(x, y) for x, y in [(p + i, q + j) for i, j in dir] if in_grid(x, y)]
def dfs(node):
visited.append(node)
for v in neighbours(node):
if grid[v[0]][v[1]]== "1" and v not in visited:
dfs(v)
components = 0
visited = []
for i in range(len(grid)):
for j in range(len(grid[0])):
node = (i, j)
if grid[i][j] == "1" and node not in visited:
components += 1
dfs(node)
return components

I think your approach is correct. However you are using visited as a list which takes O(n) to search a value. So its overall time complexity is O(N^2). I would suggest to use set rather than list which is a hash table.
There is just two parts to revise:
visited = [] -> visited = set()
visited.append(node) ->visited.add(node)
I confirmed that it is accepted. Now node not in visited takes O(1) so the overall time complexity is O(N).
% Like most of other LeetCode problems, the problem statement does not give any information about input data. But as your code is TLE so we can assume that we cannot solve it with time complexity O(n^2).

The reason you are getting TLE is that you are using a list to keep a track of visited nodes. The search of a value in a list takes O(n) time in the worst case.
It's optimal to keep the index status of a visited/non-visited node as a 2D matrix containing boolean values or O/1 integer values. This leads to a constant access time O(1) to find the visited/non-visited status of a node.
class Solution {
boolean isSafe(char[][] grid, int[][] visited, int i, int j)
{
int n = grid.length;
int m = grid[0].length;
if((i<0 || i>n-1) || (j<0 || j>m-1))
return false;
return visited[i][j] == 0 && grid[i][j] == '1';
}
void DFS(char[][] grid, int[][] visited, int i, int j)
{
visited[i][j] = 1; //marked visited
int[] row = {-1, 0, 1, 0};
int[] column = {0, 1, 0, -1};
for(int k = 0; k<4; k++)
{
if(isSafe(grid, visited, i+row[k], j+column[k]))
{
DFS(grid, visited, i+row[k], j+column[k]);
}
}
}
int DFSUtil(char[][] grid)
{
int count = 0;
if(grid == null || grid.length == 0)
return count;
int n = grid.length; //rows
int m = grid[0].length; //columns
int[][] visited = new int[n][m];
for(int i = 0; i<n; i++)
for(int j = 0; j<m; j++)
{
if(grid[i][j]=='1' && visited[i][j] == 0)
{
DFS(grid, visited, i, j);
count++;
}
}
return count;
}
public int numIslands(char[][] grid) {
int result = DFSUtil(grid);
return result;
}
}

I solved it in Java by a DFS approach, it's simple and easy to understand. Here, my code may help you:
public static int numIslands(char[][] grid) {
int countOfIslands = 0 ;
for (int i = 0; i <grid.length ; i++) {
for (int j = 0; j <grid[i].length ; j++) {
if(grid[i][j] == '1'){
DFS(grid,i,j);
countOfIslands++;
}
}
}
return countOfIslands;
}
public static void DFS(char[][] grid , int row , int col){
if(grid[row][col] == '0')
return;
grid[row][col] = '0';
// System.out.println("grid = " + Arrays.deepToString(grid) + ", row = " + row + ", col = " + col);
if(row+1 < grid.length)
DFS(grid,row+1,col);
if(row-1 >=0)
DFS(grid,row-1,col);
if(col+1 <grid[0].length)
DFS(grid,row,col+1);
if(col-1 >= 0)
DFS(grid,row,col-1);
}
Reference for if this is your first time hearing about the DFS for a graph:
DFS Approach

Modified Simple DFS Solution
class Solution {
public int numIslands(char[][] grid) {
int count = 0;
for (int i = 0; i < grid.length; i++) {
for (int j = 0; j < grid[i].length; j++) {
if (grid[i][j] != '0') {
count++;
shrink(grid, i, j);
}
}
}
return count;
}
private void shrink(char[][] grid, int i, int j) {
if (i < 0 || j < 0 || i >= grid.length || j >= grid[0].length || grid[i][j] ==
'0')
return;
grid[i][j] = '0';
shrink(grid, i, j+1);
shrink(grid, i, j-1);
shrink(grid, i+1, j);
shrink(grid, i-1, j);
}
}

Why is Python returning None instead of value [duplicate]

This question already has answers here:
Why does my recursive function return None?
(4 answers)
Closed 4 years ago.
another one of these stupid questions.
I have a very simpe algorithm here to calculate the greatest commen divider.
My C++ snippet looks like this
int findGcd(int m, int n)
{
int r = m % n;
if(r == 0)
{
return n;
}
else
{
m = n;
n = r;
findGcd(m, n);
}
}
int main()
{
cout << findGcd(13, 3);
return 0;
}
... And it returns (exactly as expected in this example) 1.
If I implement it in Python though - like the following:
def findGcd(m, n):
"""Calculates the greatest common divider """
r = m % n
if r == 0:
return n
else:
m = n
n = r
findGcd(m, n)
number = findGcd(13,3)
print(number)
It just returns NONE instead of 1. I already debugged it and n did indeed have the correct value of 1 stored in it but returns None none the less.
I can fix this by adding "return" to my recursive call of the function in the else branch.
But why is that?

In both cases, you need a return in the recursive call.
Without that, in C++, you have undefined behavior. In Python, you get None.
C++
int findGcd(int m, int n)
{
int r = m % n;
if(r == 0)
{
return n;
}
else
{
m = n;
n = r;
return findGcd(m, n);
}
}
Python:
def findGcd(m, n):
"""Calculates the greatest common divider """
r = m % n
if r == 0:
return n
else:
m = n
n = r
return findGcd(m, n)
You catch such problems in C++ by increasing the warning level on your compiler. With g++ -Wall, I get:
socc.cc: In function ‘int findGcd(int, int)’:
socc.cc:16:1: warning: control reaches end of non-void function [-Wreturn-type]
}
^

maximum of gcd of huge list of number [duplicate]

what is the fastest way to compute the greatest common divisor of n numbers?

Without recursion:
int result = numbers[0];
for(int i = 1; i < numbers.length; i++){
result = gcd(result, numbers[i]);
}
return result;
For very large arrays, it might be faster to use the fork-join pattern, where you split your array and calculate gcds in parallel. Here is some pseudocode:
int calculateGCD(int[] numbers){
if(numbers.length <= 2){
return gcd(numbers);
}
else {
INVOKE-IN-PARALLEL {
left = calculateGCD(extractLeftHalf(numbers));
right = calculateGCD(extractRightHalf(numbers));
}
return gcd(left,right);
}
}

You may want to sort the numbers first and compute the gcd recursively starting from the smallest two numbers.

C++17
I have written this function for calculating gcd of n numbers by using C++'s inbuilt __gcd(int a, int b) function.
int gcd(vector<int> vec, int vsize)
{
int gcd = vec[0];
for (int i = 1; i < vsize; i++)
{
gcd = __gcd(gcd, vec[i]);
}
return gcd;
}
To know more about this function visit this link .
Also refer to Dijkstra's GCD algorithm from the following link. It works without division. So it could be slightly faster (Please correct me if I am wrong.)

You should use Lehmer's GCD algorithm.

How about the following using Euclidean algorithm by subtraction:
function getGCD(arr){
let min = Math.min(...arr);
let max= Math.max(...arr);
if(min==max){
return min;
}else{
for(let i in arr){
if(arr[i]>min){
arr[i]=arr[i]-min;
}
}
return getGCD(arr);
}
}
console.log(getGCD([2,3,4,5,6]))
The above implementation takes O(n^2) time. There are improvements that can be implemented but I didn't get around trying these out for n numbers.

If you have a lot of small numbers, factorization may be actually faster.
//Java
int[] array = {60, 90, 45};
int gcd = 1;
outer: for (int d = 2; true; d += 1 + (d % 2)) {
boolean any = false;
do {
boolean all = true;
any = false;
boolean ready = true;
for (int i = 0; i < array.length; i++) {
ready &= (array[i] == 1);
if (array[i] % d == 0) {
any = true;
array[i] /= d;
} else all = false;
}
if (all) gcd *= d;
if (ready) break outer;
} while (any);
}
System.out.println(gcd);
(works for some examples, but not really tested)

Use the Euclidean algorithm :
function gcd(a, b)
while b ≠ 0
t := b;
b := a mod b;
a := t;
return a;
You apply it for the first two numbers, then the result with the third number, etc... :
read(a);
read(b);
result := gcd(a, b);
i := 3;
while(i <= n){
read(a)
result := gcd(result, a);
}
print(result);

Here below is the source code of the C program to find HCF of N numbers using Arrays.
#include<stdio.h>
int main()
{
int n,i,gcd;
printf("Enter how many no.s u want to find gcd : ");
scanf("%d",&n);
int arr[n];
printf("\nEnter your numbers below :- \n ");
for(i=0;i<n;i++)
{
printf("\nEnter your %d number = ",i+1);
scanf("%d",&arr[i]);
}
gcd=arr[0];
int j=1;
while(j<n)
{
if(arr[j]%gcd==0)
{
j++;
}
else
{
gcd=arr[j]%gcd;
i++;
}
}
printf("\nGCD of k no.s = %d ",gcd);
return 0;
}
For more refer to this website for further clarification.......

You can use divide and conquer. To calculate gcdN([]), you divide the list into first half and second half. if it only has one num for each list. you calculate using gcd2(n1, n2).
I just wrote a quick sample code. (assuming all num in the list are positive Ints)
def gcdN(nums):
n = len(nums)
if n == 0: return "ERROR"
if n == 1: return nums[0]
if n >= 2: return gcd2(gcdN(nums[:n//2]), gcdN(nums[n//2:]))
def gcd2(n1, n2):
for num in xrange(min(n1, n2), 0, -1):
if n1 % num == 0 and n2 % num == 0:
return num

Here's a gcd method that uses the property that gcd(a, b, c) = gcd(a, gcd(b, c)).
It uses BigInteger's gcd method since it is already optimized.
public static BigInteger gcd(BigInteger[] parts){
BigInteger gcd = parts[0];
for(int i = 1; i < parts.length; i++)
gcd = parts[i].gcd(gcd);
return gcd;
}

//Recursive solution to get the GCD of Two Numbers
long long int gcd(long long int a,long long int b)<br>
{
return b==0 ? a : gcd(b,a%b);
}
int main(){
long long int a,b;
cin>>a>>b;
if(a>b) cout<<gcd(a,b);
else cout<<gcd(b,a);
return 0;
}

import java.io.*;
import java.util.*;
import java.text.*;
import java.math.*;
import java.util.regex.*;
class GCDArray{
public static int [] extractLeftHalf(int [] numbers)
{
int l =numbers.length/2;
int arr[] = Arrays.copyOf(numbers, l+1);
return arr;
}
public static int [] extractRightHalf(int [] numbers)
{
int l =numbers.length/2;
int arr[] = Arrays.copyOfRange(numbers,l+1, numbers.length);
return arr;
}
public static int gcd(int[] numbers)
{
if(numbers.length==1)
return numbers[0];
else {
int x = numbers[0];
int y = numbers[1];
while(y%x!=0)
{
int rem = y%x;
y = x;
x = rem;
}
return x;
}
}
public static int gcd(int x,int y)
{
while(y%x!=0)
{
int rem = y%x;
y = x;
x = rem;
}
return x;
}
public static int calculateGCD(int[] numbers){
if(numbers.length <= 2){
return gcd(numbers);
}
else {
int left = calculateGCD(extractLeftHalf(numbers));
int right = calculateGCD(extractRightHalf(numbers));
return gcd(left,right);
}
}
public static void main(String[] args) {
Scanner sc = new Scanner(System.in);
int n = sc.nextInt();
int arr[] = new int[n];
for(int i=0;i<n;i++){
arr[i]=sc.nextInt();
}
System.out.println(calculateGCD(arr));
}
}
**
Above is the java working code ..... the pseudo code of which is
already mention by https://stackoverflow.com/users/7412/dogbane
**

A recursive JavaScript (ES6) one-liner for any number of digits.
const gcd = (a, b, ...c) => b ? gcd(b, a % b, ...c) : c.length ? gcd(a, ...c) : Math.abs(a);

This is what comes off the top of my head in Javascript.
function calculateGCD(arrSize, arr) {
if(!arrSize)
return 0;
var n = Math.min(...arr);
for (let i = n; i > 0; i--) {
let j = 0;
while(j < arrSize) {
if(arr[j] % i === 0) {
j++;
}else {
break;
}
if(j === arrSize) {
return i;
}
}
}
}
console.log(generalizedGCD(4, [2, 6, 4, 8]));
// Output => 2

Here was the answer I was looking for.
The best way to find the gcd of n numbers is indeed using recursion.ie gcd(a,b,c)=gcd(gcd(a,b),c). But I was getting timeouts in certain programs when I did this.
The optimization that was needed here was that the recursion should be solved using fast matrix multiplication algorithm.

find Minimum Window Substring

Given a string S and a string T, find the minimum window in S which will contain all the characters in T in complexity O(n).
For example,
S = "ADOBECODEBANC"
T = "ABC"
Minimum window is "BANC".
Update,
I read one implementation from http://articles.leetcode.com/2010/11/finding-minimum-window-in-s-which.html, and the implementation seems wrong, which does not decrease count, and also does not move begin when find a matched window? Thanks.
// Returns false if no valid window is found. Else returns
// true and updates minWindowBegin and minWindowEnd with the
// starting and ending position of the minimum window.
bool minWindow(const char* S, const char *T,
int &minWindowBegin, int &minWindowEnd) {
int sLen = strlen(S);
int tLen = strlen(T);
int needToFind[256] = {0};
for (int i = 0; i < tLen; i++)
needToFind[T[i]]++;
int hasFound[256] = {0};
int minWindowLen = INT_MAX;
int count = 0;
for (int begin = 0, end = 0; end < sLen; end++) {
// skip characters not in T
if (needToFind[S[end]] == 0) continue;
hasFound[S[end]]++;
if (hasFound[S[end]] <= needToFind[S[end]])
count++;
// if window constraint is satisfied
if (count == tLen) {
// advance begin index as far right as possible,
// stop when advancing breaks window constraint.
while (needToFind[S[begin]] == 0 ||
hasFound[S[begin]] > needToFind[S[begin]]) {
if (hasFound[S[begin]] > needToFind[S[begin]])
hasFound[S[begin]]--;
begin++;
}
// update minWindow if a minimum length is met
int windowLen = end - begin + 1;
if (windowLen < minWindowLen) {
minWindowBegin = begin;
minWindowEnd = end;
minWindowLen = windowLen;
} // end if
} // end if
} // end for
return (count == tLen) ? true : false;
}

Assume that String S and T only contains A-Z characters (26 characters)
First, create an array count, which store the frequency of each characters in T.
Process each character in S, maintaining a window l, r, which will be the current minimum window that contains all characters in T.
We maintain an array cur to store the current frequency of characters in window. If the frequency of the character at the left end of the window is greater than needed frequency, we increase l
Sample Code:
int[]count = new int[26];
for(int i = 0; i < T.length; i++)
count[T[i] - 'A']++;
int need = 0;//Number of unique characters in T
for(int i = 0; i < 26; i++)
if(count[i] > 0)
need++;
int l = 0, r = 0;
int count = 0;
int result ;
int[]cur = new int[26];
for(int i = 0; i < S.length; i++){
cur[S[i] - 'A']++;
r = i;
if(cur[S[i] - 'A'] == count[S[i] - `A`]){
count++;
}
//Update the start of the window,
while(cur[S[l] - 'A'] > count[S[l] - 'A']){
cur[S[l] - 'A']--;
l++;
}
if(count == need)
result = min(result, r - l + 1);
}
Each character in S will be processed at most two times, which give us O(n) complexity.

def minWindow(self, s, t):
"""
:type s: str
:type t: str
:rtype: str
"""
count = len(t)
require = [0] * 128
chSet = [False] * 128
for i in range(count):
require[ord(t[i])] += 1
chSet[ord(t[i])] = True
i = -1
j = 0
minLen = 999999999
minIdx = 0
while i < len(s) and j < len(s):
if count > 0:
i += 1
if i == len(s):
index = 0
else:
index = ord(s[i])
require[index] -= 1
if chSet[index] and require[index] >=0:
count -= 1
else:
if minLen > i - j + 1:
minLen = i - j + 1
minIdx = j
require[ord(s[j])] += 1
if chSet[ord(s[j])] and require[ord(s[j])] > 0:
count += 1
j += 1
if minLen == 999999999:
return ""
return s[minIdx:minIdx+minLen]
The method I used was to map the characters and how many are in the substring vs how many are needed. If all the values are non-negative, then you can remove characters from the start of the substring until you reach a negative, and if there's a negative, you add to the end of the substring until it is 0 again. You continue this until you've reached the end of S, and then remove characters until you have a negative count for one of the characters.
Going through the example, S="ADOBECODEBANC" and T="ABC". Starting out, the map has the values A=-1, B=-1, C=-1, and has a count of 3 negatives. Adding the first letter increases A to 0, which removes a negative, leaving a count of 2. You can count the others as well, since they will never become negative, resulting in A=0,B=0,C=0,D=1,O=1,E=1 when you add the C. Since the negative count is 0, you start removing characters from the start, which is A, dropping it to -1, and switching back to adding at the end.
You then add to the end until you reach an A again, which results in A=0,B=1,C=0,D=2,E=2,O=2 and a count of 0. Remove from the start until you reach a negative again, which removes D,O,B,E,C, since B's removal only drops it to 0, not a negative. At that point, the substring is "ODEBA" and C = -1. Add to the end until you reach a C and you have "ODEBANC", and remove from the start until you get a negative again, leaving "ANC". You've reached the end of the string and have a negative, so there is no shorter string remaining with all the characters.
You can retrieve the shortest substring by taking the start and end indices of the mapped substring whenever you switch from removing to adding and storing them if they are shorter than the previous shortest. If you never switch from removing to adding, then the result is the empty string.
If S="BANC" and T="ABC", then the result is adding until you reach "BANC", switching to remove, hitting a negative (and therefore copying those lengths at 0 and 3), and attempting to add beyond the end which ends the algorithm with the substring starting at 0 and ending at 3.
As every character gets adding once and removed once or less, it takes 2n steps at most to complete the algorithm, an O(n) solution.
Idea from mike3

You can try this method:
Create a hash of T (because order of characters in t does not matter, we will use its hash)
Now take two pointers (to iterate through S), both indexed at 0 to begin with. Let their names be i,j.
Increment j at each step and calculate hash of S as you move forward. When this hash covers hash of T (of course you will need to compare the two hashes at each step), start to increment i (and decrement hash values in hash of S) until hash remains covered.
When hash of S < hash of T, start again by incrementing j.
At any point, the least window size of i..j that covers hash of T is your answer.
PS: take care of the corner cases, like end of string and all. I'll help if you need the code, but I'd recommend if you try it yourself first and then ask doubts.

A more 'pythonic' approach on the algorithm explained in http://articles.leetcode.com/2010/11/finding-minimum-window-in-s-which.html
In short: Use head and tail 'pointers', advance head until match is found, then advance tail (decrease the size of the window), while the substring still matches.
import collections
def windows(S, T):
# empty string/multiset is matched everywhere
if not T:
yield(0, 0)
return
# target multiset initialized to contents of T
target_ms = collections.Counter(T)
# empty test multiset
test_ms = collections.Counter()
head = enumerate(S)
tail = enumerate(S)
# while the condition is not met, advance head
# and add to the test multiset
# iterate over the whole input with head
for i_head, char_head in head:
test_ms[char_head] += 1
# while the condition is met, advance tail
# (and subtract from test multiset)
# (a - b) for Counters has only elements from a that
# remained >0 after subtraction
while not target_ms - test_ms:
i_tail, char_tail = tail.next()
yield (i_tail, i_head + 1)
test_ms[char_tail] -= 1
def min_window(S, T):
# initialize
min_len = len(S) + 1
min_start, min_end = None, None
# go through all matching windows, pick the shortest
for start, end in windows(S, T):
if end - start < min_len:
min_start, min_end = start, end
min_len = end - start
return (min_start, min_end)

My C++ solution that runs in O(n) time (accepted solution at leetcode, runs faster than 99% of submitted C++ solutions):
#include<string>
#include<vector>
using namespace std;
class CharCounter
{
private:
const int fullCount;
int currentCount;
vector<pair<short, short>> charMap;
public:
CharCounter(const string &str) :fullCount(str.size()), currentCount(0) {
charMap = vector<pair<short, short>>(128, { 0,0 });
for (const auto ch : str) {
charMap[ch].second++;
}
};
void reset() {
for (auto &entry : charMap)
entry.first = 0;
currentCount = 0;
}
bool complete() const {
return (currentCount == fullCount);
}
void add(char ch) {
if (charMap[ch].second > 0) {
if (charMap[ch].first < charMap[ch].second)
currentCount++;
charMap[ch].first++;
}
}
void subtract(char ch) {
if (charMap[ch].second > 0) {
if (charMap[ch].first <= charMap[ch].second)
currentCount--;
charMap[ch].first--;
}
}
};
class Solution
{
public:
string minWindow(string s, string t) {
if ((s.size() < 1) || (t.size() < 1))
return "";
CharCounter counter(t);
pair<size_t, size_t> shortest = { 0, numeric_limits<size_t>::max() };
size_t beg = 0, end = 0;
while (end < s.size()) {
while ((end < s.size()) && (!counter.complete())) {
counter.add(s[end]);
if (counter.complete())
break;
end++;
}
while (beg < end) {
counter.subtract(s[beg]);
if (!counter.complete()) {
counter.add(s[beg]);
break;
}
beg++;
}
if (counter.complete()) {
if ((end - beg) < shortest.second - shortest.first) {
shortest = { beg, end };
if (shortest.second - shortest.first + 1 == t.size())
break;
}
if (end >= s.size() - 1)
break;
counter.subtract(s[beg++]);
end++;
}
}
return s.substr(shortest.first, shortest.second - shortest.first + 1);
}
};
The idea is simple: iterate the source string (s) char by char using two "pointers", beg and end. Add every char encountered at the end. As soon as all chairs contained in t added, update the shortest interval. Increment left pointer beg and subtract the left char from counter.
Example of usage:
int main()
{
Solution fnder;
string str = "figehaeci";
string chars = "aei";
string shortest = fnder.minWindow(str, chars); // returns "aeci"
}
The only purpose of the CharCounter is to count encountered chars contained in t.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Longest palindromic substring top down dynamic programming - python

Related

PowerSum time out test case

Leetcode 200. Number of Islands TLE

Why is Python returning None instead of value [duplicate]

maximum of gcd of huge list of number [duplicate]

find Minimum Window Substring

Categories

Resources