Skip to content

Commit b53875f

Browse files
authored
[FLINK-36709][parquet] fix parquet can not read row with last column is array. (apache#25651)
1 parent c825e15 commit b53875f

File tree

2 files changed

+10
-12
lines changed

2 files changed

+10
-12
lines changed

flink-formats/flink-parquet/src/main/java/org/apache/flink/formats/parquet/utils/NestedPositionUtil.java

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -50,12 +50,10 @@ public static RowPosition calculateRowOffsets(
5050
int nullValuesCount = 0;
5151
BooleanArrayList nullRowFlags = new BooleanArrayList(0);
5252
for (int i = 0; i < fieldDefinitionLevels.length; i++) {
53+
// If a row's last field is an array, the repetition levels for the array's items will
54+
// be larger than the parent row's repetition level, so we need to skip those values.
5355
if (fieldRepetitionLevels[i] > rowRepetitionLevel) {
54-
throw new IllegalStateException(
55-
format(
56-
"In parquet's row type field repetition level should not larger than row's repetition level. "
57-
+ "Row repetition level is %s, row field repetition level is %s.",
58-
rowRepetitionLevel, fieldRepetitionLevels[i]));
56+
continue;
5957
}
6058

6159
if (fieldDefinitionLevels[i] >= rowDefinitionLevel) {

flink-formats/flink-parquet/src/test/java/org/apache/flink/formats/parquet/vector/ParquetColumnarRowSplitReaderTest.java

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ class ParquetColumnarRowSplitReaderTest {
151151
Collections.singletonList(
152152
new RowType.RowField("a", new IntType())))),
153153
RowType.of(
154+
new IntType(),
154155
new ArrayType(
155156
true,
156157
new RowType(
@@ -161,8 +162,7 @@ class ParquetColumnarRowSplitReaderTest {
161162
true,
162163
new ArrayType(
163164
true, new IntType()))),
164-
new RowType.RowField("c", new IntType())))),
165-
new IntType()));
165+
new RowType.RowField("c", new IntType()))))));
166166

167167
@SuppressWarnings("unchecked")
168168
private static final DataFormatConverters.DataFormatConverter<RowData, Row>
@@ -829,15 +829,15 @@ private List<Row> prepareNestedData(int rowNum) {
829829
new Map[] {null, mp1, mp2},
830830
new Row[] {Row.of(i), Row.of(i + 1), null},
831831
Row.of(
832+
i,
832833
new Row[] {
833834
Row.of(
834835
new Integer[][] {
835836
{i, i + 1, null}, {i, i + 2, null}, {}, null
836837
},
837838
i),
838839
null
839-
},
840-
i)));
840+
})));
841841
}
842842
return rows;
843843
}
@@ -888,15 +888,15 @@ private Path createNestedDataByOriginWriter(int rowNum, File tmpDir, int rowGrou
888888
row2.add(0, i + 1);
889889
f4.addGroup(0);
890890

891-
// add ROW<`f0` ARRAY<ROW<`b` ARRAY<ARRAY<INT>>, `c` INT>>, `f1` INT>>
891+
// add ROW<`f0` INT , `f1` INTARRAY<ROW<`b` ARRAY<ARRAY<INT>>, `c` INT>>>>
892892
Group f5 = row.addGroup("f5");
893-
Group arrayRow = f5.addGroup(0);
893+
f5.add(0, i);
894+
Group arrayRow = f5.addGroup(1);
894895
Group insideRow = arrayRow.addGroup(0).addGroup(0);
895896
Group insideArray = insideRow.addGroup(0);
896897
createParquetDoubleNestedArray(insideArray, i);
897898
insideRow.add(1, i);
898899
arrayRow.addGroup(0);
899-
f5.add(1, i);
900900
writer.write(row);
901901
}
902902
} catch (Exception e) {

0 commit comments

Comments
 (0)