
Read Time:193 Minute, 26 Second
Combining Arrays¶
Vertical Stack¶
Given two or more existing arrays, you can stack them vertically using the vstack() function.In [116]:
from numpy import array
from numpy import vstack
a1 = array([[1,2,3],
[4,5,6],
[7,8,9]])
print("numpy array a1\n", a1)
a2 = array([[10,11,12],
[13,14,15],
[16,17,18]])
print("numpy array a2\n", a2)
a3 = vstack((a1, a2))
print("Vertical Stack a1, a2\n", a3)
print("Vertical Stack a1, a2 Shape\n", a3.shape)
Horizontal Stack¶
Given two or more existing arrays, you can stack them horizontally using the hstack() function.In [117]:
from numpy import array
from numpy import hstack
a1 = array([[1,2,3,20],
[4,5,6,30],
[7,8,9,40]])
print("numpy array a1\n", a1)
a2 = array([[10,11,12],
[13,14,15],
[16,17,18]])
print("numpy array a2\n", a2)
a3 = hstack((a1, a2))
print("Horizontal Stack a1, a2\n", a3)
print("Horizontal Stack a1, a2 Shape\n", a3.shape)
Convert a one-dimensional list of data to an NumPy array¶
- Calling the array() NumPy function.
In [77]:
from numpy import array
# list of data
data = [11, 20, 30, 50]
# numpy array of data
data = array(data)
print(data)
print(type(data))
Convert a two-dimensional list of data to an NumPy array¶
- Machine learning will have two-dimensional data; each row represents a new observation and each column represents a new feature.
- In Python, a two-dimensional data is "list of lists"; each list represents a new observation.
- Convert a "list of lists" to a NumPy array by calling the array() function.
In [78]:
from numpy import array
# list of lists
data = [[11,12, 13],
[34, 35, 36],
[55, 66, 77]]
# numpy array of data
data = array(data)
print(data)
print(type(data))
NumPy Array Indexing¶
In [79]:
from numpy import array
# define NumPy array; one dimension
data = array([12, 23, 56, 10, 5])
# index NumPy array; one dimension
print(data[0])
print(data[3])
In [80]:
from numpy import array
# define NumPy array; one dimension
data = array([12, 23, 56, 10, 5])
# negative index NumPy array; the index -1 refers to the last item in the array
print(data[-1])
print(data[-3])
In [81]:
from numpy import array
# define NumPy array; two dimensions
data = array([[11,12,45],
[34, 10, 67],
[87, 9, 20]])
# index NumPy array; two dimensions
print(data[0,0])
In [82]:
from numpy import array
# define NumPy array; two dimensions
data = array([[11,12,45],
[34, 10, 67],
[87, 9, 20]])
# index all items in the first row
print("all items in the first row", data[0,:])
# index all items in the first column
print("all items in the first column", data[:,0])
NumPy Array Slicing¶
- This is most useful in machine learning when specifying input variables and output variables, or splitting training rows from testing rows.
- data[from:to]; The slice extends from the "from" index and ends one item before the "to" index.
One-Dimensional Slicing¶
In [83]:
from numpy import array
# slice of a one-dimensinaol array
# define NumPy array; one-dimensinaol array
data = array([90, 45, 78, 43, 1, 10])
# a slice that starts at index 1 and ends at one item before the index 5.
print(data[1:5])
In [84]:
from numpy import array
# negative slice of a one-dimensinaol array
# define NumPy array; one-dimensinaol array
data = array([90, 45, 78, 43, 1, 10])
# starting the slice at -3 (the third last item) and ends at one item before the index -1.
print(data[-3:-1])
In [85]:
from numpy import array
# negative slice of an one-dimensinaol array
# define NumPy array; one-dimensinaol array
data = array([90, 45, 78, 43, 1, 10])
# starting the slice at -3 (the third last item)
# not specifying a "to" index; takes the slice to the end of the dimension.
print(data[-3:])
Two-Dimensional Slicing¶
- Machine learning is common to split your loaded data into input variables (X) and the output variable (y).
- Machine learning is common to split a loaded dataset into separate train and test sets.
In [86]:
# split your loaded data into input variables (X) and the output variable (y).
from numpy import array
# define NumPy array; two dimensions
data = array([[11,12,45],
[34, 10, 67],
[87, 9, 20]])
# input variables (X): slicing all rows and all columns up to, but before the last column
X = data[:, :-1]
# output variable (y): slicing all rows and the last column.
y = data[:, -1]
print("Input variables (X)\n", X)
print("Output variable (y)\n", y)
In [87]:
# split a loaded dataset into separate train and test sets.
from numpy import array
# define NumPy array; two dimensions
data = array([[11,12,45],
[34, 10, 67],
[87, 9, 20],
[2, 31, 56],
[1, 43, 5],
[36, 21, 9],
[20, 4, 1]])
# split point
split = 5
# training data:
# slicing all rows from the beginning to the split point; ends at one row before the row 5.
# slicing all columns.
train = data[:split, :]
# test data
# slicing all rows starting from the split point; the row 5; to the end of the dimension.
# slicing all columns.
test = data[split:, :]
print("training data\n", train)
print("test data\n", test)
Array Reshaping¶
- After slicing your data, you may need to reshape it.
- It is important to know how to reshape your NumPy arrays so that your data meets the expectation of specific Python libraries; the Long Short-Term Memory recurrent neural network in Keras, require input to be specified as a three-dimensional array comprised of samples, timesteps, and features.
Data Shape¶
NumPy arrays have a "shape" attribute that returns a tuple of the length of each dimension of the array.In [88]:
from numpy import array
# define NumPy array; one-dimensinaol array
data = array([90, 45, 78, 43, 1, 10])
# accessing "shape" for an one-dimensional array.
print(data.shape)
In [89]:
from numpy import array
# define NumPy array; two dimensions
data = array([[11,12,45],
[34, 10, 67],
[87, 9, 20],
[2, 31, 56],
[1, 43, 5],
[36, 21, 9],
[20, 4, 1]])
# accessing "shape" for a two-dimensional array.
print(data.shape)
In [90]:
from numpy import array
# define NumPy array; two dimensions
data = array([[11,12,45],
[34, 10, 67],
[87, 9, 20],
[2, 31, 56],
[1, 43, 5],
[36, 21, 9],
[20, 4, 1]])
# shape[0]: the number of rows
# shape[1]: the number of columns
print("The number of rows: %d" % data.shape[0])
print("The number of columns: %d" % data.shape[1])
Reshape 1D to 2D Array¶
- Reshape a one-dimensional array into a two-dimensional array with multiple rows and one column.
In [91]:
from numpy import array
# define NumPy array; one-dimensinaol array with 6 rows.
data = array([90, 45, 78, 43, 1, 10])
print("Before reshape; one-dimensinaol array with 6 rows. ")
print(data)
print(data.shape)
# Reshape a one-dimensional array into a two-dimensional array with 6 rows and 1 column.
data = data.reshape((data.shape[0], 1))
print("\n After reshape; two-dimensional array with 6 rows and 1 column.")
print(data)
print(data.shape)
Reshape 2D to 3D Array¶
- It is common to need to reshape two-dimensional data where each row represents a sequence into a three-dimensional array for algorithms that expect multiple samples of one or more time steps and one or more features.
In [31]:
from numpy import array
# define NumPy array; two dimensions
data = array([[11,12,45],
[34, 10, 67],
[87, 9, 20],
[2, 31, 56],
[1, 43, 5],
[36, 21, 9],
[20, 4, 1]])
print("Before reshape:")
print("two-dimensinaol array with 7 rows, 3 columns.")
print(data.shape)
print(data)
# reshape two-dimensional data into a three-dimensional array
# three-dimensional array can provide multiple samples.
# three-dimensional array can provide 7 samples; each sample with 3 rows and 1 column.
# In the LSTM recurrent neural network model:
# shape[0] : the number of samples
# shape[1] : the number of time steps
# n : fix the number of features; n = 1
n = 1
data = data.reshape((data.shape[0], data.shape[1], n))
print("\nAfter reshape:")
print("three-dimensional array with the number of 3 rows and 1 column array is 7.")
print("three-dimensional array can provide 7 samples")
print("each sample with 3 rows and 1 column.")
print(data.shape)
print(data)
Reshape 3D to 4D Array¶
In [32]:
from numpy import array
# define NumPy array; two dimensions
data = array([[[11,12,45],
[34, 10,67],
[87, 9,20],
[2, 31,56],
[1, 43,5],
[36, 21,9],
[20, 4,1]]])
print("Before reshape: three-dimensinaol array")
print(data.shape)
print(data)
n = 1
data = data.reshape((data.shape[0], data.shape[1], data.shape[2],n))
print("\nAfter reshape: four-dimensional array")
print(data.shape)
print(data)
In [33]:
from numpy import array
# define NumPy array; two dimensions
data = array([[[11,12,45],
[34, 10,67],
[87, 9,20],
[2, 31,56],
[1, 43,5],
[36, 21,9],
[20, 4,1]],
[[1,2,3],
[4,5,6],
[7,8,9],
[10,11,12],
[13,14,15],
[36, 21,9],
[20, 4,1]]])
print("Before reshape: three-dimensinaol array")
print(data.shape)
print(data)
n = 1
data = data.reshape((data.shape[0], data.shape[1], data.shape[2],n))
print("\nAfter reshape: four-dimensional array")
print(data.shape)
print(data)
In [ ]: